parser.c
上传用户:sy_wanhua
上传日期:2013-07-25
资源大小:3048k
文件大小:295k
- /*
- * parser.c : an XML 1.0 non-verifying parser
- *
- * See Copyright for the status of this software.
- *
- * Daniel.Veillard@w3.org
- */
- #ifdef WIN32
- #include "win32config.h"
- #else
- #include "config.h"
- #endif
- #include <stdio.h>
- #include <string.h> /* for memset() only */
- #ifdef HAVE_CTYPE_H
- #include <ctype.h>
- #endif
- #ifdef HAVE_STDLIB_H
- #include <stdlib.h>
- #endif
- #ifdef HAVE_SYS_STAT_H
- #include <sys/stat.h>
- #endif
- #ifdef HAVE_FCNTL_H
- #include <fcntl.h>
- #endif
- #ifdef HAVE_UNISTD_H
- #include <unistd.h>
- #endif
- #ifdef HAVE_ZLIB_H
- #include <zlib.h>
- #endif
- #include <libxml/xmlmemory.h>
- #include <libxml/tree.h>
- #include <libxml/parser.h>
- #include <libxml/entities.h>
- #include <libxml/encoding.h>
- #include <libxml/valid.h>
- #include <libxml/parserInternals.h>
- #include <libxml/xmlIO.h>
- #include "xml-error.h"
- #define XML_PARSER_BIG_BUFFER_SIZE 1000
- #define XML_PARSER_BUFFER_SIZE 100
- const char *xmlParserVersion = LIBXML_VERSION_STRING;
- int xmlGetWarningsDefaultValue = 1;
- /*
- * List of XML prefixed PI allowed by W3C specs
- */
- const char *xmlW3CPIs[] = {
- "xml-stylesheet",
- NULL
- };
- void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
- void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
- xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
- const xmlChar **str);
- /************************************************************************
- * *
- * Input handling functions for progressive parsing *
- * *
- ************************************************************************/
- /* #define DEBUG_INPUT */
- /* #define DEBUG_STACK */
- /* #define DEBUG_PUSH */
- #define INPUT_CHUNK 250
- /* we need to keep enough input to show errors in context */
- #define LINE_LEN 80
- #ifdef DEBUG_INPUT
- #define CHECK_BUFFER(in) check_buffer(in)
- void check_buffer(xmlParserInputPtr in) {
- if (in->base != in->buf->buffer->content) {
- fprintf(stderr, "xmlParserInput: base mismatch problemn");
- }
- if (in->cur < in->base) {
- fprintf(stderr, "xmlParserInput: cur < base problemn");
- }
- if (in->cur > in->base + in->buf->buffer->use) {
- fprintf(stderr, "xmlParserInput: cur > base + use problemn");
- }
- fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %dn",
- (int) in, (int) in->buf->buffer->content, in->cur - in->base,
- in->buf->buffer->use, in->buf->buffer->size);
- }
- #else
- #define CHECK_BUFFER(in)
- #endif
- /**
- * xmlParserInputRead:
- * @in: an XML parser input
- * @len: an indicative size for the lookahead
- *
- * This function refresh the input for the parser. It doesn't try to
- * preserve pointers to the input buffer, and discard already read data
- *
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
- * end of this entity
- */
- int
- xmlParserInputRead(xmlParserInputPtr in, int len) {
- int ret;
- int used;
- int index;
- #ifdef DEBUG_INPUT
- fprintf(stderr, "Readn");
- #endif
- if (in->buf == NULL) return(-1);
- if (in->base == NULL) return(-1);
- if (in->cur == NULL) return(-1);
- if (in->buf->buffer == NULL) return(-1);
- CHECK_BUFFER(in);
- used = in->cur - in->buf->buffer->content;
- ret = xmlBufferShrink(in->buf->buffer, used);
- if (ret > 0) {
- in->cur -= ret;
- in->consumed += ret;
- }
- ret = xmlParserInputBufferRead(in->buf, len);
- if (in->base != in->buf->buffer->content) {
- /*
- * the buffer has been realloced
- */
- index = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[index];
- }
- CHECK_BUFFER(in);
- return(ret);
- }
- /**
- * xmlParserInputGrow:
- * @in: an XML parser input
- * @len: an indicative size for the lookahead
- *
- * This function increase the input for the parser. It tries to
- * preserve pointers to the input buffer, and keep already read data
- *
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
- * end of this entity
- */
- int
- xmlParserInputGrow(xmlParserInputPtr in, int len) {
- int ret;
- int index;
- #ifdef DEBUG_INPUT
- fprintf(stderr, "Grown");
- #endif
- if (in->buf == NULL) return(-1);
- if (in->base == NULL) return(-1);
- if (in->cur == NULL) return(-1);
- if (in->buf->buffer == NULL) return(-1);
- CHECK_BUFFER(in);
- index = in->cur - in->base;
- if (in->buf->buffer->use > index + INPUT_CHUNK) {
- CHECK_BUFFER(in);
- return(0);
- }
- if (in->buf->readcallback != NULL)
- ret = xmlParserInputBufferGrow(in->buf, len);
- else
- return(0);
- /*
- * NOTE : in->base may be a "dandling" i.e. freed pointer in this
- * block, but we use it really as an integer to do some
- * pointer arithmetic. Insure will raise it as a bug but in
- * that specific case, that's not !
- */
- if (in->base != in->buf->buffer->content) {
- /*
- * the buffer has been realloced
- */
- index = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[index];
- }
- CHECK_BUFFER(in);
- return(ret);
- }
- /**
- * xmlParserInputShrink:
- * @in: an XML parser input
- *
- * This function removes used input for the parser.
- */
- void
- xmlParserInputShrink(xmlParserInputPtr in) {
- int used;
- int ret;
- int index;
- #ifdef DEBUG_INPUT
- fprintf(stderr, "Shrinkn");
- #endif
- if (in->buf == NULL) return;
- if (in->base == NULL) return;
- if (in->cur == NULL) return;
- if (in->buf->buffer == NULL) return;
- CHECK_BUFFER(in);
- used = in->cur - in->buf->buffer->content;
- if (used > INPUT_CHUNK) {
- ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
- if (ret > 0) {
- in->cur -= ret;
- in->consumed += ret;
- }
- }
- CHECK_BUFFER(in);
- if (in->buf->buffer->use > INPUT_CHUNK) {
- return;
- }
- xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
- if (in->base != in->buf->buffer->content) {
- /*
- * the buffer has been realloced
- */
- index = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[index];
- }
- CHECK_BUFFER(in);
- }
- /************************************************************************
- * *
- * Parser stacks related functions and macros *
- * *
- ************************************************************************/
- int xmlSubstituteEntitiesDefaultValue = 0;
- int xmlDoValidityCheckingDefaultValue = 0;
- int xmlKeepBlanksDefaultValue = 1;
- xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
- const xmlChar ** str);
- /*
- * Generic function for accessing stacks in the Parser Context
- */
- #define PUSH_AND_POP(scope, type, name)
- scope int name##Push(xmlParserCtxtPtr ctxt, type value) {
- if (ctxt->name##Nr >= ctxt->name##Max) {
- ctxt->name##Max *= 2;
- ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab,
- ctxt->name##Max * sizeof(ctxt->name##Tab[0]));
- if (ctxt->name##Tab == NULL) {
- fprintf(stderr, "realloc failed !n");
- return(0);
- }
- }
- ctxt->name##Tab[ctxt->name##Nr] = value;
- ctxt->name = value;
- return(ctxt->name##Nr++);
- }
- scope type name##Pop(xmlParserCtxtPtr ctxt) {
- type ret;
- if (ctxt->name##Nr <= 0) return(0);
- ctxt->name##Nr--;
- if (ctxt->name##Nr > 0)
- ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1];
- else
- ctxt->name = NULL;
- ret = ctxt->name##Tab[ctxt->name##Nr];
- ctxt->name##Tab[ctxt->name##Nr] = 0;
- return(ret);
- }
- PUSH_AND_POP(extern, xmlParserInputPtr, input)
- PUSH_AND_POP(extern, xmlNodePtr, node)
- PUSH_AND_POP(extern, xmlChar*, name)
- int spacePush(xmlParserCtxtPtr ctxt, int val) {
- if (ctxt->spaceNr >= ctxt->spaceMax) {
- ctxt->spaceMax *= 2;
- ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
- ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
- if (ctxt->spaceTab == NULL) {
- fprintf(stderr, "realloc failed !n");
- return(0);
- }
- }
- ctxt->spaceTab[ctxt->spaceNr] = val;
- ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
- return(ctxt->spaceNr++);
- }
- int spacePop(xmlParserCtxtPtr ctxt) {
- int ret;
- if (ctxt->spaceNr <= 0) return(0);
- ctxt->spaceNr--;
- if (ctxt->spaceNr > 0)
- ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
- else
- ctxt->space = NULL;
- ret = ctxt->spaceTab[ctxt->spaceNr];
- ctxt->spaceTab[ctxt->spaceNr] = -1;
- return(ret);
- }
- /*
- * Macros for accessing the content. Those should be used only by the parser,
- * and not exported.
- *
- * Dirty macros, i.e. one need to make assumption on the context to use them
- *
- * CUR_PTR return the current pointer to the xmlChar to be parsed.
- * To be used with extreme caution since operations consuming
- * characters may move the input buffer to a different location !
- * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
- * in ISO-Latin or UTF-8.
- * This should be used internally by the parser
- * only to compare to ASCII values otherwise it would break when
- * running with UTF-8 encoding.
- * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
- * to compare on ASCII based substring.
- * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
- * strings within the parser.
- *
- * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
- *
- * NEXT Skip to the next character, this does the proper decoding
- * in UTF-8 mode. It also pop-up unfinished entities on the fly.
- * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
- * CUR_CHAR Return the current char as an int as well as its lenght.
- */
- #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
- #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
- #define NXT(val) ctxt->input->cur[(val)]
- #define CUR_PTR ctxt->input->cur
- #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val);
- if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
- if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
- xmlPopInput(ctxt)
- #define SHRINK xmlParserInputShrink(ctxt->input);
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
- xmlPopInput(ctxt)
- #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
- xmlPopInput(ctxt)
- #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
- #define NEXT xmlNextChar(ctxt);
- #define NEXTL(l)
- if (*(ctxt->input->cur) == 'n') {
- ctxt->input->line++; ctxt->input->col = 1;
- } else ctxt->input->col++;
- ctxt->token = 0; ctxt->input->cur += l;
- if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
- if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
- #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
- #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
- #define COPY_BUF(l,b,i,v)
- if (l == 1) b[i++] = (xmlChar) v;
- else i += xmlCopyChar(l,&b[i],v);
- /**
- * xmlNextChar:
- * @ctxt: the XML parser context
- *
- * Skip to the next char input char.
- */
- void
- xmlNextChar(xmlParserCtxtPtr ctxt) {
- /*
- * TODO: 2.11 End-of-Line Handling
- * the literal two-character sequence "#xD#xA" or a standalone
- * literal #xD, an XML processor must pass to the application
- * the single character #xA.
- */
- if (ctxt->token != 0) ctxt->token = 0;
- else {
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
- (ctxt->instate != XML_PARSER_COMMENT)) {
- /*
- * If we are at the end of the current entity and
- * the context allows it, we pop consumed entities
- * automatically.
- * TODO: the auto closing should be blocked in other cases
- */
- xmlPopInput(ctxt);
- } else {
- if (*(ctxt->input->cur) == 'n') {
- ctxt->input->line++; ctxt->input->col = 1;
- } else ctxt->input->col++;
- if (ctxt->encoding == NULL) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- const unsigned char *cur = ctxt->input->cur;
- unsigned char c;
- c = *cur;
- if (c & 0x80) {
- if (cur[1] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
- unsigned int val;
- if (cur[2] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- ctxt->input->cur += 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- ctxt->input->cur += 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- if (((val > 0xd7ff) && (val < 0xe000)) ||
- ((val > 0xfffd) && (val < 0x10000)) ||
- (val >= 0x110000)) {
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Char out of allowed rangen");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- } else
- /* 2-byte code */
- ctxt->input->cur += 2;
- } else
- /* 1-byte code */
- ctxt->input->cur++;
- } else {
- /*
- * Assume it's a fixed lenght encoding (1) with
- * a compatibke encoding for the ASCII set, since
- * XML constructs only use < 128 chars
- */
- ctxt->input->cur++;
- }
- ctxt->nbChars++;
- if (*ctxt->input->cur == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- }
- }
- if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
- if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
- xmlPopInput(ctxt);
- return;
- encoding_error:
- /*
- * If we detect an UTF8 error that probably mean that the
- * input encoding didn't get properly advertized in the
- * declaration header. Report the error and switch the encoding
- * to ISO-Latin-1 (if you don't like this policy, just declare the
- * encoding !)
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Input is not proper UTF-8, indicate encoding !n");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
- ctxt->input->cur++;
- return;
- }
- /**
- * xmlCurrentChar:
- * @ctxt: the XML parser context
- * @len: pointer to the length of the char read
- *
- * The current char value, if using UTF-8 this may actaully span multiple
- * bytes in the input buffer. Implement the end of line normalization:
- * 2.11 End-of-Line Handling
- * Wherever an external parsed entity or the literal entity value
- * of an internal parsed entity contains either the literal two-character
- * sequence "#xD#xA" or a standalone literal #xD, an XML processor
- * must pass to the application the single character #xA.
- * This behavior can conveniently be produced by normalizing all
- * line breaks to #xA on input, before parsing.)
- *
- * Returns the current char value and its lenght
- */
- int
- xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
- if (ctxt->token != 0) {
- *len = 0;
- return(ctxt->token);
- }
- if (ctxt->encoding == NULL) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- const unsigned char *cur = ctxt->input->cur;
- unsigned char c;
- unsigned int val;
- c = *cur;
- if (c & 0x80) {
- if (cur[1] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
- if (cur[2] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- *len = 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- *len = 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- } else {
- /* 2-byte code */
- *len = 2;
- val = (cur[0] & 0x1f) << 6;
- val |= cur[1] & 0x3f;
- }
- if (!IS_CHAR(val)) {
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Char out of allowed rangen");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- return(val);
- } else {
- /* 1-byte code */
- *len = 1;
- if (*ctxt->input->cur == 0xD) {
- if (ctxt->input->cur[1] == 0xA) {
- ctxt->nbChars++;
- ctxt->input->cur++;
- }
- return(0xA);
- }
- return((int) *ctxt->input->cur);
- }
- }
- /*
- * Assume it's a fixed lenght encoding (1) with
- * a compatibke encoding for the ASCII set, since
- * XML constructs only use < 128 chars
- */
- *len = 1;
- if (*ctxt->input->cur == 0xD) {
- if (ctxt->input->cur[1] == 0xA) {
- ctxt->nbChars++;
- ctxt->input->cur++;
- }
- return(0xA);
- }
- return((int) *ctxt->input->cur);
- encoding_error:
- /*
- * If we detect an UTF8 error that probably mean that the
- * input encoding didn't get properly advertized in the
- * declaration header. Report the error and switch the encoding
- * to ISO-Latin-1 (if you don't like this policy, just declare the
- * encoding !)
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Input is not proper UTF-8, indicate encoding !n");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
- *len = 1;
- return((int) *ctxt->input->cur);
- }
- /**
- * xmlStringCurrentChar:
- * @ctxt: the XML parser context
- * @cur: pointer to the beginning of the char
- * @len: pointer to the length of the char read
- *
- * The current char value, if using UTF-8 this may actaully span multiple
- * bytes in the input buffer.
- *
- * Returns the current char value and its lenght
- */
- int
- xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
- if (ctxt->encoding == NULL) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- unsigned char c;
- unsigned int val;
- c = *cur;
- if (c & 0x80) {
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- *len = 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- *len = 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- } else {
- /* 2-byte code */
- *len = 2;
- val = (cur[0] & 0x1f) << 6;
- val |= cur[2] & 0x3f;
- }
- if (!IS_CHAR(val)) {
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Char out of allowed rangen");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- return(val);
- } else {
- /* 1-byte code */
- *len = 1;
- return((int) *cur);
- }
- }
- /*
- * Assume it's a fixed lenght encoding (1) with
- * a compatibke encoding for the ASCII set, since
- * XML constructs only use < 128 chars
- */
- *len = 1;
- return((int) *cur);
- encoding_error:
- /*
- * If we detect an UTF8 error that probably mean that the
- * input encoding didn't get properly advertized in the
- * declaration header. Report the error and switch the encoding
- * to ISO-Latin-1 (if you don't like this policy, just declare the
- * encoding !)
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Input is not proper UTF-8, indicate encoding !n");
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- *len = 1;
- return((int) *cur);
- }
- /**
- * xmlCopyChar:
- * @len: pointer to the length of the char read (or zero)
- * @array: pointer to an arry of xmlChar
- * @val: the char value
- *
- * append the char value in the array
- *
- * Returns the number of xmlChar written
- */
- int
- xmlCopyChar(int len, xmlChar *out, int val) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- */
- if (len == 0) {
- if (val < 0) len = 0;
- else if (val < 0x80) len = 1;
- else if (val < 0x800) len = 2;
- else if (val < 0x10000) len = 3;
- else if (val < 0x110000) len = 4;
- if (len == 0) {
- fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of boundn",
- val);
- return(0);
- }
- }
- if (len > 1) {
- int bits;
- if (val < 0x80) { *out++= val; bits= -6; }
- else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
- else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
- else { *out++= (val >> 18) | 0xF0; bits= 12; }
-
- for ( ; bits >= 0; bits-= 6)
- *out++= ((val >> bits) & 0x3F) | 0x80 ;
- return(len);
- }
- *out = (xmlChar) val;
- return(1);
- }
- /**
- * xmlSkipBlankChars:
- * @ctxt: the XML parser context
- *
- * skip all blanks character found at that point in the input streams.
- * It pops up finished entities in the process if allowable at that point.
- *
- * Returns the number of space chars skipped
- */
- int
- xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
- int cur, res = 0;
- do {
- cur = CUR;
- while (IS_BLANK(cur)) {
- NEXT;
- cur = CUR;
- res++;
- }
- while ((cur == 0) && (ctxt->inputNr > 1) &&
- (ctxt->instate != XML_PARSER_COMMENT)) {
- xmlPopInput(ctxt);
- cur = CUR;
- }
- if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
- if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
- } while (IS_BLANK(cur));
- return(res);
- }
- /************************************************************************
- * *
- * Commodity functions to handle entities processing *
- * *
- ************************************************************************/
- /**
- * xmlPopInput:
- * @ctxt: an XML parser context
- *
- * xmlPopInput: the current input pointed by ctxt->input came to an end
- * pop it and return the next char.
- *
- * Returns the current xmlChar in the parser context
- */
- xmlChar
- xmlPopInput(xmlParserCtxtPtr ctxt) {
- if (ctxt->inputNr == 1) return(0); /* End of main Input */
- xmlFreeInputStream(inputPop(ctxt));
- if ((*ctxt->input->cur == 0) &&
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
- return(xmlPopInput(ctxt));
- return(CUR);
- }
- /**
- * xmlPushInput:
- * @ctxt: an XML parser context
- * @input: an XML parser input fragment (entity, XML fragment ...).
- *
- * xmlPushInput: switch to a new input stream which is stacked on top
- * of the previous one(s).
- */
- void
- xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
- if (input == NULL) return;
- inputPush(ctxt, input);
- GROW;
- }
- /**
- * xmlFreeInputStream:
- * @input: an xmlParserInputPtr
- *
- * Free up an input stream.
- */
- void
- xmlFreeInputStream(xmlParserInputPtr input) {
- if (input == NULL) return;
- if (input->filename != NULL) xmlFree((char *) input->filename);
- if (input->directory != NULL) xmlFree((char *) input->directory);
- if (input->encoding != NULL) xmlFree((char *) input->encoding);
- if (input->version != NULL) xmlFree((char *) input->version);
- if ((input->free != NULL) && (input->base != NULL))
- input->free((xmlChar *) input->base);
- if (input->buf != NULL)
- xmlFreeParserInputBuffer(input->buf);
- memset(input, -1, sizeof(xmlParserInput));
- xmlFree(input);
- }
- /**
- * xmlNewInputStream:
- * @ctxt: an XML parser context
- *
- * Create a new input stream structure
- * Returns the new input stream or NULL
- */
- xmlParserInputPtr
- xmlNewInputStream(xmlParserCtxtPtr ctxt) {
- xmlParserInputPtr input;
- input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
- if (input == NULL) {
- if (ctxt != NULL) {
- ctxt->errNo = XML_ERR_NO_MEMORY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "malloc: couldn't allocate a new input streamn");
- ctxt->errNo = XML_ERR_NO_MEMORY;
- }
- return(NULL);
- }
- memset(input, 0, sizeof(xmlParserInput));
- input->line = 1;
- input->col = 1;
- input->standalone = -1;
- return(input);
- }
- /**
- * xmlNewIOInputStream:
- * @ctxt: an XML parser context
- * @input: an I/O Input
- * @enc: the charset encoding if known
- *
- * Create a new input stream structure encapsulating the @input into
- * a stream suitable for the parser.
- *
- * Returns the new input stream or NULL
- */
- xmlParserInputPtr
- xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
- xmlCharEncoding enc) {
- xmlParserInputPtr inputStream;
- inputStream = xmlNewInputStream(ctxt);
- if (inputStream == NULL) {
- return(NULL);
- }
- inputStream->filename = NULL;
- inputStream->buf = input;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- if (enc != XML_CHAR_ENCODING_NONE) {
- xmlSwitchEncoding(ctxt, enc);
- }
- return(inputStream);
- }
- /**
- * xmlNewEntityInputStream:
- * @ctxt: an XML parser context
- * @entity: an Entity pointer
- *
- * Create a new input stream based on an xmlEntityPtr
- *
- * Returns the new input stream or NULL
- */
- xmlParserInputPtr
- xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
- xmlParserInputPtr input;
- if (entity == NULL) {
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "internal: xmlNewEntityInputStream entity = NULLn");
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- return(NULL);
- }
- if (entity->content == NULL) {
- switch (entity->etype) {
- case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
- ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlNewEntityInputStream unparsed entity !n");
- break;
- case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
- case XML_EXTERNAL_PARAMETER_ENTITY:
- return(xmlLoadExternalEntity((char *) entity->SystemID,
- (char *) entity->ExternalID, ctxt));
- case XML_INTERNAL_GENERAL_ENTITY:
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Internal entity %s without content !n", entity->name);
- break;
- case XML_INTERNAL_PARAMETER_ENTITY:
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Internal parameter entity %s without content !n", entity->name);
- break;
- case XML_INTERNAL_PREDEFINED_ENTITY:
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Predefined entity %s without content !n", entity->name);
- break;
- }
- return(NULL);
- }
- input = xmlNewInputStream(ctxt);
- if (input == NULL) {
- return(NULL);
- }
- input->filename = (char *) entity->SystemID;
- input->base = entity->content;
- input->cur = entity->content;
- input->length = entity->length;
- return(input);
- }
- /**
- * xmlNewStringInputStream:
- * @ctxt: an XML parser context
- * @buffer: an memory buffer
- *
- * Create a new input stream based on a memory buffer.
- * Returns the new input stream
- */
- xmlParserInputPtr
- xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
- xmlParserInputPtr input;
- if (buffer == NULL) {
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "internal: xmlNewStringInputStream string = NULLn");
- return(NULL);
- }
- input = xmlNewInputStream(ctxt);
- if (input == NULL) {
- return(NULL);
- }
- input->base = buffer;
- input->cur = buffer;
- input->length = xmlStrlen(buffer);
- return(input);
- }
- /**
- * xmlNewInputFromFile:
- * @ctxt: an XML parser context
- * @filename: the filename to use as entity
- *
- * Create a new input stream based on a file.
- *
- * Returns the new input stream or NULL in case of error
- */
- xmlParserInputPtr
- xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
- xmlParserInputBufferPtr buf;
- xmlParserInputPtr inputStream;
- char *directory = NULL;
- if (ctxt == NULL) return(NULL);
- buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
- if (buf == NULL) {
- char name[XML_PARSER_BIG_BUFFER_SIZE];
- if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
- #ifdef WIN32
- sprintf(name, "%s\%s", ctxt->input->directory, filename);
- #else
- sprintf(name, "%s/%s", ctxt->input->directory, filename);
- #endif
- buf = xmlParserInputBufferCreateFilename(name,
- XML_CHAR_ENCODING_NONE);
- if (buf != NULL)
- directory = xmlParserGetDirectory(name);
- }
- if ((buf == NULL) && (ctxt->directory != NULL)) {
- #ifdef WIN32
- sprintf(name, "%s\%s", ctxt->directory, filename);
- #else
- sprintf(name, "%s/%s", ctxt->directory, filename);
- #endif
- buf = xmlParserInputBufferCreateFilename(name,
- XML_CHAR_ENCODING_NONE);
- if (buf != NULL)
- directory = xmlParserGetDirectory(name);
- }
- if (buf == NULL)
- return(NULL);
- }
- if (directory == NULL)
- directory = xmlParserGetDirectory(filename);
- inputStream = xmlNewInputStream(ctxt);
- if (inputStream == NULL) {
- if (directory != NULL) xmlFree((char *) directory);
- return(NULL);
- }
- inputStream->filename = xmlMemStrdup(filename);
- inputStream->directory = directory;
- inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
- return(inputStream);
- }
- /************************************************************************
- * *
- * Commodity functions to handle parser contexts *
- * *
- ************************************************************************/
- /**
- * xmlInitParserCtxt:
- * @ctxt: an XML parser context
- *
- * Initialize a parser context
- */
- void
- xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
- {
- xmlSAXHandler *sax;
- xmlDefaultSAXHandlerInit();
- sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
- if (sax == NULL) {
- fprintf(stderr, "xmlInitParserCtxt: out of memoryn");
- }
- memset(sax, 0, sizeof(xmlSAXHandler));
- /* Allocate the Input stack */
- ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
- ctxt->inputNr = 0;
- ctxt->inputMax = 5;
- ctxt->input = NULL;
- ctxt->version = NULL;
- ctxt->encoding = NULL;
- ctxt->standalone = -1;
- ctxt->hasExternalSubset = 0;
- ctxt->hasPErefs = 0;
- ctxt->html = 0;
- ctxt->external = 0;
- ctxt->instate = XML_PARSER_START;
- ctxt->token = 0;
- ctxt->directory = NULL;
- /* Allocate the Node stack */
- ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
- ctxt->nodeNr = 0;
- ctxt->nodeMax = 10;
- ctxt->node = NULL;
- /* Allocate the Name stack */
- ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
- ctxt->nameNr = 0;
- ctxt->nameMax = 10;
- ctxt->name = NULL;
- /* Allocate the space stack */
- ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
- ctxt->spaceNr = 1;
- ctxt->spaceMax = 10;
- ctxt->spaceTab[0] = -1;
- ctxt->space = &ctxt->spaceTab[0];
- if (sax == NULL) {
- ctxt->sax = &xmlDefaultSAXHandler;
- } else {
- ctxt->sax = sax;
- memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
- }
- ctxt->userData = ctxt;
- ctxt->myDoc = NULL;
- ctxt->wellFormed = 1;
- ctxt->valid = 1;
- ctxt->validate = xmlDoValidityCheckingDefaultValue;
- ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
- ctxt->vctxt.userData = ctxt;
- if (ctxt->validate) {
- ctxt->vctxt.error = xmlParserValidityError;
- if (xmlGetWarningsDefaultValue == 0)
- ctxt->vctxt.warning = NULL;
- else
- ctxt->vctxt.warning = xmlParserValidityWarning;
- /* Allocate the Node stack */
- ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
- ctxt->vctxt.nodeNr = 0;
- ctxt->vctxt.nodeMax = 4;
- ctxt->vctxt.node = NULL;
- } else {
- ctxt->vctxt.error = NULL;
- ctxt->vctxt.warning = NULL;
- }
- ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
- ctxt->record_info = 0;
- ctxt->nbChars = 0;
- ctxt->checkIndex = 0;
- ctxt->inSubset = 0;
- ctxt->errNo = XML_ERR_OK;
- ctxt->depth = 0;
- xmlInitNodeInfoSeq(&ctxt->node_seq);
- }
- /**
- * xmlFreeParserCtxt:
- * @ctxt: an XML parser context
- *
- * Free all the memory used by a parser context. However the parsed
- * document in ctxt->myDoc is not freed.
- */
- void
- xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
- {
- xmlParserInputPtr input;
- xmlChar *oldname;
- if (ctxt == NULL) return;
- while ((input = inputPop(ctxt)) != NULL) {
- xmlFreeInputStream(input);
- }
- while ((oldname = namePop(ctxt)) != NULL) {
- xmlFree(oldname);
- }
- if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
- if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
- if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
- if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
- if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
- if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
- if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
- if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
- if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
- if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
- if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
- xmlFree(ctxt->sax);
- if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
- xmlFree(ctxt);
- }
- /**
- * xmlNewParserCtxt:
- *
- * Allocate and initialize a new parser context.
- *
- * Returns the xmlParserCtxtPtr or NULL
- */
- xmlParserCtxtPtr
- xmlNewParserCtxt()
- {
- xmlParserCtxtPtr ctxt;
- ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
- if (ctxt == NULL) {
- fprintf(stderr, "xmlNewParserCtxt : cannot allocate contextn");
- perror("malloc");
- return(NULL);
- }
- memset(ctxt, 0, sizeof(xmlParserCtxt));
- xmlInitParserCtxt(ctxt);
- return(ctxt);
- }
- /**
- * xmlClearParserCtxt:
- * @ctxt: an XML parser context
- *
- * Clear (release owned resources) and reinitialize a parser context
- */
- void
- xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
- {
- xmlClearNodeInfoSeq(&ctxt->node_seq);
- xmlInitParserCtxt(ctxt);
- }
- /************************************************************************
- * *
- * Commodity functions to handle entities *
- * *
- ************************************************************************/
- /**
- * xmlCheckEntity:
- * @ctxt: an XML parser context
- * @content: the entity content string
- *
- * Parse an entity content and checks the WF constraints
- *
- */
- void
- xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
- }
- /**
- * xmlParseCharRef:
- * @ctxt: an XML parser context
- *
- * parse Reference declarations
- *
- * [66] CharRef ::= '&#' [0-9]+ ';' |
- * '&#x' [0-9a-fA-F]+ ';'
- *
- * [ WFC: Legal Character ]
- * Characters referred to using character references must match the
- * production for Char.
- *
- * Returns the value parsed (as an int), 0 in case of error
- */
- int
- xmlParseCharRef(xmlParserCtxtPtr ctxt) {
- int val = 0;
- if (ctxt->token != 0) {
- val = ctxt->token;
- ctxt->token = 0;
- return(val);
- }
- if ((RAW == '&') && (NXT(1) == '#') &&
- (NXT(2) == 'x')) {
- SKIP(3);
- while (RAW != ';') {
- if ((RAW >= '0') && (RAW <= '9'))
- val = val * 16 + (CUR - '0');
- else if ((RAW >= 'a') && (RAW <= 'f'))
- val = val * 16 + (CUR - 'a') + 10;
- else if ((RAW >= 'A') && (RAW <= 'F'))
- val = val * 16 + (CUR - 'A') + 10;
- else {
- ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid hexadecimal valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- val = 0;
- break;
- }
- NEXT;
- }
- if (RAW == ';') {
- /* on purpose to avoid reentrancy problems with NEXT and SKIP */
- ctxt->nbChars ++;
- ctxt->input->cur++;
- }
- } else if ((RAW == '&') && (NXT(1) == '#')) {
- SKIP(2);
- while (RAW != ';') {
- if ((RAW >= '0') && (RAW <= '9'))
- val = val * 10 + (CUR - '0');
- else {
- ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid decimal valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- val = 0;
- break;
- }
- NEXT;
- }
- if (RAW == ';') {
- /* on purpose to avoid reentrancy problems with NEXT and SKIP */
- ctxt->nbChars ++;
- ctxt->input->cur++;
- }
- } else {
- ctxt->errNo = XML_ERR_INVALID_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- /*
- * [ WFC: Legal Character ]
- * Characters referred to using character references must match the
- * production for Char.
- */
- if (IS_CHAR(val)) {
- return(val);
- } else {
- ctxt->errNo = XML_ERR_INVALID_CHAR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %dn",
- val);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- return(0);
- }
- /**
- * xmlParseStringCharRef:
- * @ctxt: an XML parser context
- * @str: a pointer to an index in the string
- *
- * parse Reference declarations, variant parsing from a string rather
- * than an an input flow.
- *
- * [66] CharRef ::= '&#' [0-9]+ ';' |
- * '&#x' [0-9a-fA-F]+ ';'
- *
- * [ WFC: Legal Character ]
- * Characters referred to using character references must match the
- * production for Char.
- *
- * Returns the value parsed (as an int), 0 in case of error, str will be
- * updated to the current value of the index
- */
- int
- xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
- const xmlChar *ptr;
- xmlChar cur;
- int val = 0;
- if ((str == NULL) || (*str == NULL)) return(0);
- ptr = *str;
- cur = *ptr;
- if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
- ptr += 3;
- cur = *ptr;
- while (cur != ';') {
- if ((cur >= '0') && (cur <= '9'))
- val = val * 16 + (cur - '0');
- else if ((cur >= 'a') && (cur <= 'f'))
- val = val * 16 + (cur - 'a') + 10;
- else if ((cur >= 'A') && (cur <= 'F'))
- val = val * 16 + (cur - 'A') + 10;
- else {
- ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid hexadecimal valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- val = 0;
- break;
- }
- ptr++;
- cur = *ptr;
- }
- if (cur == ';')
- ptr++;
- } else if ((cur == '&') && (ptr[1] == '#')){
- ptr += 2;
- cur = *ptr;
- while (cur != ';') {
- if ((cur >= '0') && (cur <= '9'))
- val = val * 10 + (cur - '0');
- else {
- ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid decimal valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- val = 0;
- break;
- }
- ptr++;
- cur = *ptr;
- }
- if (cur == ';')
- ptr++;
- } else {
- ctxt->errNo = XML_ERR_INVALID_CHARREF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseCharRef: invalid valuen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return(0);
- }
- *str = ptr;
- /*
- * [ WFC: Legal Character ]
- * Characters referred to using character references must match the
- * production for Char.
- */
- if (IS_CHAR(val)) {
- return(val);
- } else {
- ctxt->errNo = XML_ERR_INVALID_CHAR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CharRef: invalid xmlChar value %dn", val);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- return(0);
- }
- /**
- * xmlParserHandleReference:
- * @ctxt: the parser context
- *
- * [67] Reference ::= EntityRef | CharRef
- *
- * [68] EntityRef ::= '&' Name ';'
- *
- * [ WFC: Entity Declared ]
- * the Name given in the entity reference must match that in an entity
- * declaration, except that well-formed documents need not declare any
- * of the following entities: amp, lt, gt, apos, quot.
- *
- * [ WFC: Parsed Entity ]
- * An entity reference must not contain the name of an unparsed entity
- *
- * [66] CharRef ::= '&#' [0-9]+ ';' |
- * '&#x' [0-9a-fA-F]+ ';'
- *
- * A PEReference may have been detectect in the current input stream
- * the handling is done accordingly to
- * http://www.w3.org/TR/REC-xml#entproc
- */
- void
- xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
- xmlParserInputPtr input;
- xmlChar *name;
- xmlEntityPtr ent = NULL;
- if (ctxt->token != 0) {
- return;
- }
- if (RAW != '&') return;
- GROW;
- if ((RAW == '&') && (NXT(1) == '#')) {
- switch(ctxt->instate) {
- case XML_PARSER_ENTITY_DECL:
- case XML_PARSER_PI:
- case XML_PARSER_CDATA_SECTION:
- case XML_PARSER_COMMENT:
- case XML_PARSER_SYSTEM_LITERAL:
- /* we just ignore it there */
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef at EOFn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_MISC:
- ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef in prolog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_EPILOG:
- ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "CharRef in epilog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_DTD:
- ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CharRef are forbiden in DTDs!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlDecodeEntities
- */
- return;
- case XML_PARSER_CONTENT:
- case XML_PARSER_ATTRIBUTE_VALUE:
- ctxt->token = xmlParseCharRef(ctxt);
- return;
- }
- return;
- }
- switch(ctxt->instate) {
- case XML_PARSER_CDATA_SECTION:
- return;
- case XML_PARSER_PI:
- case XML_PARSER_COMMENT:
- case XML_PARSER_SYSTEM_LITERAL:
- case XML_PARSER_CONTENT:
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference at EOFn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_MISC:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference in prolog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_EPILOG:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Reference in epilog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlDecodeEntities
- */
- return;
- case XML_PARSER_ATTRIBUTE_VALUE:
- /*
- * NOTE: in the case of attributes values, we don't do the
- * substitution here unless we are in a mode where
- * the parser is explicitely asked to substitute
- * entities. The SAX callback is called with values
- * without entity substitution.
- * This will then be handled by xmlDecodeEntities
- */
- return;
- case XML_PARSER_ENTITY_DECL:
- /*
- * we just ignore it there
- * the substitution will be done once the entity is referenced
- */
- return;
- case XML_PARSER_DTD:
- ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity references are forbiden in DTDs!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- }
- NEXT;
- name = xmlScanName(ctxt);
- if (name == NULL) {
- ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Entity reference: no namen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->token = '&';
- return;
- }
- if (NXT(xmlStrlen(name)) != ';') {
- ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference: ';' expectedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->token = '&';
- xmlFree(name);
- return;
- }
- SKIP(xmlStrlen(name) + 1);
- if (ctxt->sax != NULL) {
- if (ctxt->sax->getEntity != NULL)
- ent = ctxt->sax->getEntity(ctxt->userData, name);
- }
- /*
- * [ WFC: Entity Declared ]
- * the Name given in the entity reference must match that in an entity
- * declaration, except that well-formed documents need not declare any
- * of the following entities: amp, lt, gt, apos, quot.
- */
- if (ent == NULL)
- ent = xmlGetPredefinedEntity(name);
- if (ent == NULL) {
- ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference: entity %s not declaredn",
- name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- xmlFree(name);
- return;
- }
- /*
- * [ WFC: Parsed Entity ]
- * An entity reference must not contain the name of an unparsed entity
- */
- if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
- ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Entity reference to unparsed entity %sn", name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
- ctxt->token = ent->content[0];
- xmlFree(name);
- return;
- }
- input = xmlNewEntityInputStream(ctxt, ent);
- xmlPushInput(ctxt, input);
- xmlFree(name);
- return;
- }
- /**
- * xmlParserHandlePEReference:
- * @ctxt: the parser context
- *
- * [69] PEReference ::= '%' Name ';'
- *
- * [ WFC: No Recursion ]
- * TODO A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
- *
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an internal DTD
- * subset which contains no parameter entity references, or a document
- * with "standalone='yes'", ... ... The declaration of a parameter
- * entity must precede any reference to it...
- *
- * [ VC: Entity Declared ]
- * In a document with an external subset or external parameter entities
- * with "standalone='no'", ... ... The declaration of a parameter entity
- * must precede any reference to it...
- *
- * [ WFC: In DTD ]
- * Parameter-entity references may only appear in the DTD.
- * NOTE: misleading but this is handled.
- *
- * A PEReference may have been detected in the current input stream
- * the handling is done accordingly to
- * http://www.w3.org/TR/REC-xml#entproc
- * i.e.
- * - Included in literal in entity values
- * - Included as Paraemeter Entity reference within DTDs
- */
- void
- xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
- xmlChar *name;
- xmlEntityPtr entity = NULL;
- xmlParserInputPtr input;
- if (ctxt->token != 0) {
- return;
- }
- if (RAW != '%') return;
- switch(ctxt->instate) {
- case XML_PARSER_CDATA_SECTION:
- return;
- case XML_PARSER_COMMENT:
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- ctxt->errNo = XML_ERR_PEREF_AT_EOF;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "PEReference at EOFn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_MISC:
- ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "PEReference in prolog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_DECL:
- case XML_PARSER_CONTENT:
- case XML_PARSER_ATTRIBUTE_VALUE:
- case XML_PARSER_PI:
- case XML_PARSER_SYSTEM_LITERAL:
- /* we just ignore it there */
- return;
- case XML_PARSER_EPILOG:
- ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "PEReference in epilog!n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlDecodeEntities
- */
- return;
- case XML_PARSER_DTD:
- /*
- * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
- * In the internal DTD subset, parameter-entity references
- * can occur only where markup declarations can occur, not
- * within markup declarations.
- * In that case this is handled in xmlParseMarkupDecl
- */
- if ((ctxt->external == 0) && (ctxt->inputNr == 1))
- return;
- }
- NEXT;
- name = xmlParseName(ctxt);
- if (name == NULL) {
- ctxt->errNo = XML_ERR_PEREF_NO_NAME;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no namen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else {
- if (RAW == ';') {
- NEXT;
- if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
- entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
- if (entity == NULL) {
-
- /*
- * [ WFC: Entity Declared ]
- * In a document without any DTD, a document with only an
- * internal DTD subset which contains no parameter entity
- * references, or a document with "standalone='yes'", ...
- * ... The declaration of a parameter entity must precede
- * any reference to it...
- */
- if ((ctxt->standalone == 1) ||
- ((ctxt->hasExternalSubset == 0) &&
- (ctxt->hasPErefs == 0))) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "PEReference: %%%s; not foundn", name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else {
- /*
- * [ VC: Entity Declared ]
- * In a document with an external subset or external
- * parameter entities with "standalone='no'", ...
- * ... The declaration of a parameter entity must precede
- * any reference to it...
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
- ctxt->sax->warning(ctxt->userData,
- "PEReference: %%%s; not foundn", name);
- ctxt->valid = 0;
- }
- } else {
- if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
- (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
- /*
- * TODO !!! handle the extra spaces added before and after
- * c.f. http://www.w3.org/TR/REC-xml#as-PE
- */
- input = xmlNewEntityInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
- if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
- (RAW == '<') && (NXT(1) == '?') &&
- (NXT(2) == 'x') && (NXT(3) == 'm') &&
- (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
- xmlParseTextDecl(ctxt);
- }
- if (ctxt->token == 0)
- ctxt->token = ' ';
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlHandlePEReference: %s is not a parameter entityn",
- name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- }
- } else {
- ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlHandlePEReference: expecting ';'n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- xmlFree(name);
- }
- }
- /*
- * Macro used to grow the current buffer.
- */
- #define growBuffer(buffer) {
- buffer##_size *= 2;
- buffer = (xmlChar *)
- xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));
- if (buffer == NULL) {
- perror("realloc failed");
- return(NULL);
- }
- }
- /**
- * xmlDecodeEntities:
- * @ctxt: the parser context
- * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
- * @len: the len to decode (in bytes !), -1 for no size limit
- * @end: an end marker xmlChar, 0 if none
- * @end2: an end marker xmlChar, 0 if none
- * @end3: an end marker xmlChar, 0 if none
- *
- * [67] Reference ::= EntityRef | CharRef
- *
- * [69] PEReference ::= '%' Name ';'
- *
- * Returns A newly allocated string with the substitution done. The caller
- * must deallocate it !
- */
- xmlChar *
- xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
- xmlChar end, xmlChar end2, xmlChar end3) {
- xmlChar *buffer = NULL;
- int buffer_size = 0;
- int nbchars = 0;
- xmlChar *current = NULL;
- xmlEntityPtr ent;
- unsigned int max = (unsigned int) len;
- int c,l;
- if (ctxt->depth > 40) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Detected entity reference loopn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_ENTITY_LOOP;
- return(NULL);
- }
- /*
- * allocate a translation buffer.
- */
- buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
- buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
- if (buffer == NULL) {
- perror("xmlDecodeEntities: malloc failed");
- return(NULL);
- }
- /*
- * Ok loop until we reach one of the ending char or a size limit.
- */
- c = CUR_CHAR(l);
- while ((nbchars < max) && (c != end) &&
- (c != end2) && (c != end3)) {
- if (c == 0) break;
- if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
- int val = xmlParseCharRef(ctxt);
- COPY_BUF(0,buffer,nbchars,val);
- NEXTL(l);
- } else if ((c == '&') && (ctxt->token != '&') &&
- (what & XML_SUBSTITUTE_REF)) {
- ent = xmlParseEntityRef(ctxt);
- if ((ent != NULL) &&
- (ctxt->replaceEntities != 0)) {
- current = ent->content;
- while (*current != 0) {
- buffer[nbchars++] = *current++;
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- }
- } else if (ent != NULL) {
- const xmlChar *cur = ent->name;
- buffer[nbchars++] = '&';
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- while (*cur != 0) {
- buffer[nbchars++] = *cur++;
- }
- buffer[nbchars++] = ';';
- }
- } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
- /*
- * a PEReference induce to switch the entity flow,
- * we break here to flush the current set of chars
- * parsed if any. We will be called back later.
- */
- if (nbchars != 0) break;
- xmlParsePEReference(ctxt);
- /*
- * Pop-up of finished entities.
- */
- while ((RAW == 0) && (ctxt->inputNr > 1))
- xmlPopInput(ctxt);
- break;
- } else {
- COPY_BUF(l,buffer,nbchars,c);
- NEXTL(l);
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- }
- c = CUR_CHAR(l);
- }
- buffer[nbchars++] = 0;
- return(buffer);
- }
- /**
- * xmlStringDecodeEntities:
- * @ctxt: the parser context
- * @str: the input string
- * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
- * @end: an end marker xmlChar, 0 if none
- * @end2: an end marker xmlChar, 0 if none
- * @end3: an end marker xmlChar, 0 if none
- *
- * [67] Reference ::= EntityRef | CharRef
- *
- * [69] PEReference ::= '%' Name ';'
- *
- * Returns A newly allocated string with the substitution done. The caller
- * must deallocate it !
- */
- xmlChar *
- xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
- xmlChar end, xmlChar end2, xmlChar end3) {
- xmlChar *buffer = NULL;
- int buffer_size = 0;
- xmlChar *current = NULL;
- xmlEntityPtr ent;
- int c,l;
- int nbchars = 0;
- if (ctxt->depth > 40) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Detected entity reference loopn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_ENTITY_LOOP;
- return(NULL);
- }
- /*
- * allocate a translation buffer.
- */
- buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
- buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
- if (buffer == NULL) {
- perror("xmlDecodeEntities: malloc failed");
- return(NULL);
- }
- /*
- * Ok loop until we reach one of the ending char or a size limit.
- */
- c = CUR_SCHAR(str, l);
- while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
- if (c == 0) break;
- if ((c == '&') && (str[1] == '#')) {
- int val = xmlParseStringCharRef(ctxt, &str);
- if (val != 0) {
- COPY_BUF(0,buffer,nbchars,val);
- }
- } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
- ent = xmlParseStringEntityRef(ctxt, &str);
- if ((ent != NULL) && (ent->content != NULL)) {
- xmlChar *rep;
- ctxt->depth++;
- rep = xmlStringDecodeEntities(ctxt, ent->content, what,
- 0, 0, 0);
- ctxt->depth--;
- if (rep != NULL) {
- current = rep;
- while (*current != 0) {
- buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- }
- xmlFree(rep);
- }
- } else if (ent != NULL) {
- int i = xmlStrlen(ent->name);
- const xmlChar *cur = ent->name;
- buffer[nbchars++] = '&';
- if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- for (;i > 0;i--)
- buffer[nbchars++] = *cur++;
- buffer[nbchars++] = ';';
- }
- } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
- ent = xmlParseStringPEReference(ctxt, &str);
- if (ent != NULL) {
- xmlChar *rep;
- ctxt->depth++;
- rep = xmlStringDecodeEntities(ctxt, ent->content, what,
- 0, 0, 0);
- ctxt->depth--;
- if (rep != NULL) {
- current = rep;
- while (*current != 0) {
- buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- }
- xmlFree(rep);
- }
- }
- } else {
- COPY_BUF(l,buffer,nbchars,c);
- str += l;
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer);
- }
- }
- c = CUR_SCHAR(str, l);
- }
- buffer[nbchars++] = 0;
- return(buffer);
- }
- /************************************************************************
- * *
- * Commodity functions to handle encodings *
- * *
- ************************************************************************/
- /*
- * xmlCheckLanguageID
- * @lang: pointer to the string value
- *
- * Checks that the value conforms to the LanguageID production:
- *
- * [33] LanguageID ::= Langcode ('-' Subcode)*
- * [34] Langcode ::= ISO639Code | IanaCode | UserCode
- * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
- * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
- * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
- * [38] Subcode ::= ([a-z] | [A-Z])+
- *
- * Returns 1 if correct 0 otherwise
- **/
- int
- xmlCheckLanguageID(const xmlChar *lang) {
- const xmlChar *cur = lang;
- if (cur == NULL)
- return(0);
- if (((cur[0] == 'i') && (cur[1] == '-')) ||
- ((cur[0] == 'I') && (cur[1] == '-'))) {
- /*
- * IANA code
- */
- cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
- ((cur[0] == 'X') && (cur[1] == '-'))) {
- /*
- * User code
- */
- cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
- /*
- * ISO639
- */
- cur++;
- if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- else
- return(0);
- } else
- return(0);
- while (cur[0] != 0) {
- if (cur[0] != '-')
- return(0);
- cur++;
- if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- else
- return(0);
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
- ((cur[0] >= 'a') && (cur[0] <= 'z')))
- cur++;
- }
- return(1);
- }
- /**
- * xmlSwitchEncoding:
- * @ctxt: the parser context
- * @enc: the encoding value (number)
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- */
- void
- xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
- {
- xmlCharEncodingHandlerPtr handler;
- handler = xmlGetCharEncodingHandler(enc);
- if (handler != NULL) {
- if (ctxt->input != NULL) {
- if (ctxt->input->buf != NULL) {
- if (ctxt->input->buf->encoder != NULL) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : encoder already regiteredn");
- return;
- }
- ctxt->input->buf->encoder = handler;
- /*
- * Is there already some content down the pipe to convert
- */
- if ((ctxt->input->buf->buffer != NULL) &&
- (ctxt->input->buf->buffer->use > 0)) {
- xmlChar *buf;
- int res, len, size;
- int processed;
- /*
- * Specific handling of the Byte Order Mark for
- * UTF-16
- */
- if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
- (ctxt->input->cur[0] == 0xFF) &&
- (ctxt->input->cur[1] == 0xFE)) {
- SKIP(2);
- }
- if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
- (ctxt->input->cur[0] == 0xFE) &&
- (ctxt->input->cur[1] == 0xFF)) {
- SKIP(2);
- }
- /*
- * convert the non processed part
- */
- processed = ctxt->input->cur - ctxt->input->base;
- len = ctxt->input->buf->buffer->use - processed;
- if (len <= 0) {
- return;
- }
- size = ctxt->input->buf->buffer->use * 4;
- if (size < 4000)
- size = 4000;
- retry_larger:
- buf = (xmlChar *) xmlMalloc(size + 1);
- if (buf == NULL) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : out of memoryn");
- return;
- }
- /* TODO !!! Handling of buf too small */
- res = handler->input(buf, size, ctxt->input->cur, &len);
- if (res == -1) {
- size *= 2;
- xmlFree(buf);
- goto retry_larger;
- }
- if ((res < 0) ||
- (len != ctxt->input->buf->buffer->use - processed)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : conversion failedn");
- xmlFree(buf);
- return;
- }
- /*
- * Conversion succeeded, get rid of the old buffer
- */
- xmlFree(ctxt->input->buf->buffer->content);
- ctxt->input->buf->buffer->content = buf;
- ctxt->input->base = buf;
- ctxt->input->cur = buf;
- ctxt->input->buf->buffer->size = size;
- ctxt->input->buf->buffer->use = res;
- buf[res] = 0;
- }
- return;
- } else {
- if (ctxt->input->length == 0) {
- /*
- * When parsing a static memory array one must know the
- * size to be able to convert the buffer.
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : no inputn");
- return;
- } else {
- xmlChar *buf;
- int res, len;
- int processed = ctxt->input->cur - ctxt->input->base;
- /*
- * convert the non processed part
- */
- len = ctxt->input->length - processed;
- if (len <= 0) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : input fully consumed?n");
- return;
- }
- buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
- if (buf == NULL) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : out of memoryn");
- return;
- }
- res = handler->input(buf, ctxt->input->length * 4,
- ctxt->input->cur, &len);
- if ((res < 0) ||
- (len != ctxt->input->length - processed)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : conversion failedn");
- xmlFree(buf);
- return;
- }
- /*
- * Conversion succeeded, get rid of the old buffer
- */
- if ((ctxt->input->free != NULL) &&
- (ctxt->input->base != NULL))
- ctxt->input->free((xmlChar *) ctxt->input->base);
- ctxt->input->base = ctxt->input->cur = buf;
- ctxt->input->length = res;
- }
- }
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : no inputn");
- }
- }
- switch (enc) {
- case XML_CHAR_ENCODING_ERROR:
- ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "encoding unknownn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- break;
- case XML_CHAR_ENCODING_NONE:
- /* let's assume it's UTF-8 without the XML decl */
- return;
- case XML_CHAR_ENCODING_UTF8:
- /* default encoding, no conversion should be needed */
- return;
- case XML_CHAR_ENCODING_UTF16LE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UTF16 little endian not supportedn");
- break;
- case XML_CHAR_ENCODING_UTF16BE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UTF16 big endian not supportedn");
- break;
- case XML_CHAR_ENCODING_UCS4LE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding USC4 little endian not supportedn");
- break;
- case XML_CHAR_ENCODING_UCS4BE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding USC4 big endian not supportedn");
- break;
- case XML_CHAR_ENCODING_EBCDIC:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding EBCDIC not supportedn");
- break;
- case XML_CHAR_ENCODING_UCS4_2143:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS4 2143 not supportedn");
- break;
- case XML_CHAR_ENCODING_UCS4_3412:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS4 3412 not supportedn");
- break;
- case XML_CHAR_ENCODING_UCS2:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS2 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_1:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_1 ISO Latin 1 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_2:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_2 ISO Latin 2 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_3:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_3 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_4:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_4 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_5:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_5 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_6:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_6 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_7:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_7 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_8:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_8 not supportedn");
- break;
- case XML_CHAR_ENCODING_8859_9:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO_8859_9 not supportedn");
- break;
- case XML_CHAR_ENCODING_2022_JP:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO-2022-JPnot supportedn");
- break;
- case XML_CHAR_ENCODING_SHIFT_JIS:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding Shift_JISnot supportedn");
- break;
- case XML_CHAR_ENCODING_EUC_JP:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding EUC-JPnot supportedn");
- break;
- }
- }
- /************************************************************************
- * *
- * Commodity functions to handle xmlChars *
- * *
- ************************************************************************/
- /**
- * xmlStrndup:
- * @cur: the input xmlChar *
- * @len: the len of @cur
- *
- * a strndup for array of xmlChar's
- *
- * Returns a new xmlChar * or NULL
- */
- xmlChar *
- xmlStrndup(const xmlChar *cur, int len) {
- xmlChar *ret;
-
- if ((cur == NULL) || (len < 0)) return(NULL);
- ret = xmlMalloc((len + 1) * sizeof(xmlChar));
- if (ret == NULL) {
- fprintf(stderr, "malloc of %ld byte failedn",
- (len + 1) * (long)sizeof(xmlChar));
- return(NULL);
- }
- memcpy(ret, cur, len * sizeof(xmlChar));
- ret[len] = 0;
- return(ret);
- }
- /**
- * xmlStrdup:
- * @cur: the input xmlChar *
- *
- * a strdup for array of xmlChar's. Since they are supposed to be
- * encoded in UTF-8 or an encoding with 8bit based chars, we assume
- * a termination mark of '0'.
- *
- * Returns a new xmlChar * or NULL
- */
- xmlChar *
- xmlStrdup(const xmlChar *cur) {
- const xmlChar *p = cur;
- if (cur == NULL) return(NULL);
- while (*p != 0) p++;
- return(xmlStrndup(cur, p - cur));
- }
- /**
- * xmlCharStrndup:
- * @cur: the input char *
- * @len: the len of @cur
- *
- * a strndup for char's to xmlChar's
- *
- * Returns a new xmlChar * or NULL
- */
- xmlChar *
- xmlCharStrndup(const char *cur, int len) {
- int i;
- xmlChar *ret;
-
- if ((cur == NULL) || (len < 0)) return(NULL);
- ret = xmlMalloc((len + 1) * sizeof(xmlChar));
- if (ret == NULL) {
- fprintf(stderr, "malloc of %ld byte failedn",
- (len + 1) * (long)sizeof(xmlChar));
- return(NULL);
- }
- for (i = 0;i < len;i++)
- ret[i] = (xmlChar) cur[i];
- ret[len] = 0;
- return(ret);
- }
- /**
- * xmlCharStrdup:
- * @cur: the input char *
- * @len: the len of @cur
- *
- * a strdup for char's to xmlChar's
- *
- * Returns a new xmlChar * or NULL
- */
- xmlChar *
- xmlCharStrdup(const char *cur) {
- const char *p = cur;
- if (cur == NULL) return(NULL);
- while (*p != ' ') p++;
- return(xmlCharStrndup(cur, p - cur));
- }
- /**
- * xmlStrcmp:
- * @str1: the first xmlChar *
- * @str2: the second xmlChar *
- *
- * a strcmp for xmlChar's
- *
- * Returns the integer result of the comparison
- */
- int
- xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
- register int tmp;
- if ((str1 == NULL) && (str2 == NULL)) return(0);
- if (str1 == NULL) return(-1);
- if (str2 == NULL) return(1);
- do {
- tmp = *str1++ - *str2++;
- if (tmp != 0) return(tmp);
- } while ((*str1 != 0) && (*str2 != 0));
- return (*str1 - *str2);
- }
- /**
- * xmlStrncmp:
- * @str1: the first xmlChar *
- * @str2: the second xmlChar *
- * @len: the max comparison length
- *
- * a strncmp for xmlChar's
- *
- * Returns the integer result of the comparison
- */
- int
- xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
- register int tmp;
- if (len <= 0) return(0);
- if ((str1 == NULL) && (str2 == NULL)) return(0);
- if (str1 == NULL) return(-1);
- if (str2 == NULL) return(1);
- do {
- tmp = *str1++ - *str2++;
- if (tmp != 0) return(tmp);
- len--;
- if (len <= 0) return(0);
- } while ((*str1 != 0) && (*str2 != 0));
- return (*str1 - *str2);
- }
- /**
- * xmlStrchr:
- * @str: the xmlChar * array
- * @val: the xmlChar to search
- *
- * a strchr for xmlChar's
- *
- * Returns the xmlChar * for the first occurence or NULL.
- */
- const xmlChar *
- xmlStrchr(const xmlChar *str, xmlChar val) {
- if (str == NULL) return(NULL);
- while (*str != 0) {
- if (*str == val) return((xmlChar *) str);
- str++;
- }
- return(NULL);
- }
- /**
- * xmlStrstr:
- * @str: the xmlChar * array (haystack)
- * @val: the xmlChar to search (needle)
- *
- * a strstr for xmlChar's
- *
- * Returns the xmlChar * for the first occurence or NULL.
- */
- const xmlChar *
- xmlStrstr(const xmlChar *str, xmlChar *val) {
- int n;
-
- if (str == NULL) return(NULL);
- if (val == NULL) return(NULL);
- n = xmlStrlen(val);
- if (n == 0) return(str);
- while (*str != 0) {
- if (*str == *val) {
- if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
- }
- str++;
- }
- return(NULL);
- }
- /**
- * xmlStrsub:
- * @str: the xmlChar * array (haystack)
- * @start: the index of the first char (zero based)
- * @len: the length of the substring
- *
- * Extract a substring of a given string
- *
- * Returns the xmlChar * for the first occurence or NULL.
- */
- xmlChar *
- xmlStrsub(const xmlChar *str, int start, int len) {
- int i;
-
- if (str == NULL) return(NULL);
- if (start < 0) return(NULL);
- if (len < 0) return(NULL);
- for (i = 0;i < start;i++) {
- if (*str == 0) return(NULL);
- str++;
- }
- if (*str == 0) return(NULL);
- return(xmlStrndup(str, len));
- }
- /**
- * xmlStrlen:
- * @str: the xmlChar * array
- *
- * length of a xmlChar's string
- *
- * Returns the number of xmlChar contained in the ARRAY.
- */
- int
- xmlStrlen(const xmlChar *str) {
- int len = 0;
- if (str == NULL) return(0);
- while (*str != 0) {
- str++;
- len++;
- }
- return(len);
- }
- /**
- * xmlStrncat:
- * @cur: the original xmlChar * array
- * @add: the xmlChar * array added
- * @len: the length of @add
- *
- * a strncat for array of xmlChar's
- *
- * Returns a new xmlChar * containing the concatenated string.
- */
- xmlChar *
- xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
- int size;
- xmlChar *ret;
- if ((add == NULL) || (len == 0))
- return(cur);
- if (cur == NULL)
- return(xmlStrndup(add, len));
- size = xmlStrlen(cur);
- ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
- if (ret == NULL) {
- fprintf(stderr, "xmlStrncat: realloc of %ld byte failedn",
- (size + len + 1) * (long)sizeof(xmlChar));
- return(cur);
- }
- memcpy(&ret[size], add, len * sizeof(xmlChar));
- ret[size + len] = 0;
- return(ret);
- }
- /**
- * xmlStrcat:
- * @cur: the original xmlChar * array
- * @add: the xmlChar * array added
- *
- * a strcat for array of xmlChar's. Since they are supposed to be
- * encoded in UTF-8 or an encoding with 8bit based chars, we assume
- * a termination mark of '0'.
- *
- * Returns a new xmlChar * containing the concatenated string.
- */
- xmlChar *
- xmlStrcat(xmlChar *cur, const xmlChar *add) {
- const xmlChar *p = add;
- if (add == NULL) return(cur);
- if (cur == NULL)
- return(xmlStrdup(add));
- while (*p != 0) p++;
- return(xmlStrncat(cur, add, p - add));
- }
- /************************************************************************
- * *
- * Commodity functions, cleanup needed ? *
- * *
- ************************************************************************/
- /**
- * areBlanks:
- * @ctxt: an XML parser context
- * @str: a xmlChar *
- * @len: the size of @str
- *
- * Is this a sequence of blank chars that one can ignore ?
- *
- * Returns 1 if ignorable 0 otherwise.
- */
- static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
- int i, ret;
- xmlNodePtr lastChild;
- /*
- * Check for xml:space value.
- */
- if (*(ctxt->space) == 1)
- return(0);
- /*
- * Check that the string is made of blanks
- */
- for (i = 0;i < len;i++)
- if (!(IS_BLANK(str[i]))) return(0);
- /*
- * Look if the element is mixed content in the Dtd if available
- */
- if (ctxt->myDoc != NULL) {
- ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
- if (ret == 0) return(1);
- if (ret == 1) return(0);
- }
- /*
- * Otherwise, heuristic :-
- */
- if (ctxt->keepBlanks)
- return(0);
- if (RAW != '<') return(0);
- if (ctxt->node == NULL) return(0);
- if ((ctxt->node->children == NULL) &&
- (RAW == '<') && (NXT(1) == '/')) return(0);
- lastChild = xmlGetLastChild(ctxt->node);
- if (lastChild == NULL) {
- if (ctxt->node->content != NULL) return(0);
- } else if (xmlNodeIsText(lastChild))
- return(0);
- else if ((ctxt->node->children != NULL) &&
- (xmlNodeIsText(ctxt->node->children)))
- return(0);
- return(1);
- }
- /**
- * xmlHandleEntity:
- * @ctxt: an XML parser context
- * @entity: an XML entity pointer.
- *
- * Default handling of defined entities, when should we define a new input
- * stream ? When do we just handle that as a set of chars ?
- *
- * OBSOLETE: to be removed at some point.
- */
- void
- xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
- int len;
- xmlParserInputPtr input;
- if (entity->content == NULL) {
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULLn",
- entity->name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return;
- }
- len = xmlStrlen(entity->content);
- if (len <= 2) goto handle_as_char;
- /*
- * Redefine its content as an input stream.
- */
- input = xmlNewEntityInputStream(ctxt, entity);
- xmlPushInput(ctxt, input);
- return;
- handle_as_char:
- /*
- * Just handle the content as a set of chars.
- */
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- (ctxt->sax->characters != NULL))
- ctxt->sax->characters(ctxt->userData, entity->content, len);
- }
- /*
- * Forward definition for recusive behaviour.
- */
- void xmlParsePEReference(xmlParserCtxtPtr ctxt);
- void xmlParseReference(xmlParserCtxtPtr ctxt);
- /************************************************************************
- * *
- * Extra stuff for namespace support *
- * Relates to http://www.w3.org/TR/WD-xml-names *
- * *
- ************************************************************************/
- /**
- * xmlNamespaceParseNCName:
- * @ctxt: an XML parser context
- *
- * parse an XML namespace name.
- *
- * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
- *
- * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
- * CombiningChar | Extender
- *
- * Returns the namespace name or NULL
- */
- xmlChar *
- xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
- xmlChar buf[XML_MAX_NAMELEN + 5];
- int len = 0, l;
- int cur = CUR_CHAR(l);
- /* load first the value of the char !!! */
- if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
- while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
- (cur == '.') || (cur == '-') ||
- (cur == '_') ||
- (IS_COMBINING(cur)) ||
- (IS_EXTENDER(cur))) {
- COPY_BUF(l,buf,len,cur);
- NEXTL(l);
- cur = CUR_CHAR(l);
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limitn");
- while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
- (cur == '.') || (cur == '-') ||
- (cur == '_') ||
- (IS_COMBINING(cur)) ||
- (IS_EXTENDER(cur))) {
- NEXTL(l);
- cur = CUR_CHAR(l);
- }
- break;
- }
- }
- return(xmlStrndup(buf, len));
- }
- /**
- * xmlNamespaceParseQName:
- * @ctxt: an XML parser context
- * @prefix: a xmlChar **
- *
- * parse an XML qualified name
- *
- * [NS 5] QName ::= (Prefix ':')? LocalPart
- *
- * [NS 6] Prefix ::= NCName
- *
- * [NS 7] LocalPart ::= NCName
- *
- * Returns the local part, and prefix is updated
- * to get the Prefix if any.
- */
- xmlChar *
- xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
- xmlChar *ret = NULL;
- *prefix = NULL;
- ret = xmlNamespaceParseNCName(ctxt);
- if (RAW == ':') {
- *prefix = ret;
- NEXT;
- ret = xmlNamespaceParseNCName(ctxt);
- }
- return(ret);
- }
- /**
- * xmlSplitQName:
- * @ctxt: an XML parser context
- * @name: an XML parser context
- * @prefix: a xmlChar **
- *
- * parse an XML qualified name string
- *
- * [NS 5] QName ::= (Prefix ':')? LocalPart
- *
- * [NS 6] Prefix ::= NCName
- *
- * [NS 7] LocalPart ::= NCName
- *
- * Returns the local part, and prefix is updated
- * to get the Prefix if any.
- */
- xmlChar *
- xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
- xmlChar buf[XML_MAX_NAMELEN + 5];
- int len = 0;
- xmlChar *ret = NULL;
- const xmlChar *cur = name;
- int c,l;
- *prefix = NULL;
- /* xml: prefix is not really a namespace */
- if ((cur[0] == 'x') && (cur[1] == 'm') &&
- (cur[2] == 'l') && (cur[3] == ':'))
- return(xmlStrdup(name));
- /* nasty but valid */
- if (cur[0] == ':')
- return(xmlStrdup(name));
- c = CUR_SCHAR(cur, l);
- if (!IS_LETTER(c) && (c != '_')) return(NULL);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- COPY_BUF(l,buf,len,c);
- cur += l;
- c = CUR_SCHAR(cur, l);
- }
-
- ret = xmlStrndup(buf, len);
- if (c == ':') {
- cur += l;
- c = CUR_SCHAR(cur, l);
- if (!IS_LETTER(c) && (c != '_')) return(ret);
- *prefix = ret;
- len = 0;
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- COPY_BUF(l,buf,len,c);
- cur += l;
- c = CUR_SCHAR(cur, l);
- }
-
- ret = xmlStrndup(buf, len);
- }
- return(ret);
- }
- /**
- * xmlNamespaceParseNSDef:
- * @ctxt: an XML parser context
- *
- * parse a namespace prefix declaration
- *
- * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
- *
- * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
- *
- * Returns the namespace name
- */
- xmlChar *
- xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
- xmlChar *name = NULL;
- if ((RAW == 'x') && (NXT(1) == 'm') &&
- (NXT(2) == 'l') && (NXT(3) == 'n') &&
- (NXT(4) == 's')) {
- SKIP(5);
- if (RAW == ':') {
- NEXT;
- name = xmlNamespaceParseNCName(ctxt);
- }
- }
- return(name);
- }
- /**
- * xmlParseQuotedString:
- * @ctxt: an XML parser context
- *
- * [OLD] Parse and return a string between quotes or doublequotes
- * To be removed at next drop of binary compatibility
- *
- * Returns the string parser or NULL.
- */
- xmlChar *
- xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len = 0,l;
- int size = XML_PARSER_BUFFER_SIZE;
- int c;
- buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "malloc of %d byte failedn", size);
- return(NULL);
- }
- if (RAW == '"') {
- NEXT;
- c = CUR_CHAR(l);
- while (IS_CHAR(c) && (c != '"')) {
- if (len + 5 >= size) {
- size *= 2;
- buf = xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "realloc of %d byte failedn", size);
- return(NULL);
- }
- }
- COPY_BUF(l,buf,len,c);
- NEXTL(l);
- c = CUR_CHAR(l);
- }
- if (c != '"') {
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closed "%.50s"n", buf);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else {
- NEXT;
- }
- } else if (RAW == '''){
- NEXT;
- c = CUR;
- while (IS_CHAR(c) && (c != ''')) {
- if (len + 1 >= size) {
- size *= 2;
- buf = xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "realloc of %d byte failedn", size);
- return(NULL);
- }
- }
- buf[len++] = c;
- NEXT;
- c = CUR;
- }
- if (RAW != ''') {
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closed "%.50s"n", buf);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else {
- NEXT;
- }
- }
- return(buf);
- }
- /**
- * xmlParseNamespace:
- * @ctxt: an XML parser context
- *
- * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
- *
- * This is what the older xml-name Working Draft specified, a bunch of
- * other stuff may still rely on it, so support is still here as
- * if it was declared on the root of the Tree:-(
- *
- * To be removed at next drop of binary compatibility
- */
- void
- xmlParseNamespace(xmlParserCtxtPtr ctxt) {
- xmlChar *href = NULL;
- xmlChar *prefix = NULL;
- int garbage = 0;
- /*
- * We just skipped "namespace" or "xml:namespace"
- */
- SKIP_BLANKS;
- while (IS_CHAR(RAW) && (RAW != '>')) {
- /*
- * We can have "ns" or "prefix" attributes
- * Old encoding as 'href' or 'AS' attributes is still supported
- */
- if ((RAW == 'n') && (NXT(1) == 's')) {
- garbage = 0;
- SKIP(2);
- SKIP_BLANKS;
- if (RAW != '=') continue;
- NEXT;
- SKIP_BLANKS;
- href = xmlParseQuotedString(ctxt);
- SKIP_BLANKS;
- } else if ((RAW == 'h') && (NXT(1) == 'r') &&
- (NXT(2) == 'e') && (NXT(3) == 'f')) {
- garbage = 0;
- SKIP(4);
- SKIP_BLANKS;
- if (RAW != '=') continue;
- NEXT;
- SKIP_BLANKS;
- href = xmlParseQuotedString(ctxt);
- SKIP_BLANKS;
- } else if ((RAW == 'p') && (NXT(1) == 'r') &&
- (NXT(2) == 'e') && (NXT(3) == 'f') &&
- (NXT(4) == 'i') && (NXT(5) == 'x')) {
- garbage = 0;
- SKIP(6);
- SKIP_BLANKS;
- if (RAW != '=') continue;
- NEXT;
- SKIP_BLANKS;
- prefix = xmlParseQuotedString(ctxt);
- SKIP_BLANKS;
- } else if ((RAW == 'A') && (NXT(1) == 'S')) {
- garbage = 0;
- SKIP(2);
- SKIP_BLANKS;
- if (RAW != '=') continue;
- NEXT;
- SKIP_BLANKS;
- prefix = xmlParseQuotedString(ctxt);
- SKIP_BLANKS;
- } else if ((RAW == '?') && (NXT(1) == '>')) {
- garbage = 0;
- NEXT;
- } else {
- /*
- * Found garbage when parsing the namespace
- */
- if (!garbage) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseNamespace found garbagen");
- }
- ctxt->errNo = XML_ERR_NS_DECL_ERROR;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- NEXT;
- }
- }
- MOVETO_ENDTAG(CUR_PTR);
- NEXT;
- /*
- * Register the DTD.
- if (href != NULL)
- if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
- ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
- */
- if (prefix != NULL) xmlFree(prefix);
- if (href != NULL) xmlFree(href);
- }
- /************************************************************************
- * *
- * The parser itself *
- * Relates to http://www.w3.org/TR/REC-xml *
- * *
- ************************************************************************/
- /**
- * xmlScanName:
- * @ctxt: an XML parser context
- *
- * Trickery: parse an XML name but without consuming the input flow
- * Needed for rollback cases.
- *
- * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
- * CombiningChar | Extender
- *
- * [5] Name ::= (Letter | '_' | ':') (NameChar)*
- *
- * [6] Names ::= Name (S Name)*
- *
- * Returns the Name parsed or NULL
- */
- xmlChar *
- xmlScanName(xmlParserCtxtPtr ctxt) {
- xmlChar buf[XML_MAX_NAMELEN];
- int len = 0;
- GROW;
- if (!IS_LETTER(RAW) && (RAW != '_') &&
- (RAW != ':')) {
- return(NULL);
- }
- while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
- (NXT(len) == '.') || (NXT(len) == '-') ||
- (NXT(len) == '_') || (NXT(len) == ':') ||
- (IS_COMBINING(NXT(len))) ||
- (IS_EXTENDER(NXT(len)))) {
- buf[len] = NXT(len);
- len++;
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlScanName: reached XML_MAX_NAMELEN limitn");
- while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
- (NXT(len) == '.') || (NXT(len) == '-') ||
- (NXT(len) == '_') || (NXT(len) == ':') ||
- (IS_COMBINING(NXT(len))) ||
- (IS_EXTENDER(NXT(len))))
- len++;
- break;
- }
- }
- return(xmlStrndup(buf, len));
- }
- /**
- * xmlParseName:
- * @ctxt: an XML parser context
- *
- * parse an XML name.
- *
- * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
- * CombiningChar | Extender
- *
- * [5] Name ::= (Letter | '_' | ':') (NameChar)*
- *
- * [6] Names ::= Name (S Name)*
- *
- * Returns the Name parsed or NULL
- */
- xmlChar *
- xmlParseName(xmlParserCtxtPtr ctxt) {
- xmlChar buf[XML_MAX_NAMELEN + 5];
- int len = 0, l;
- int c;
- GROW;
- c = CUR_CHAR(l);
- if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
- (!IS_LETTER(c) && (c != '_') &&
- (c != ':'))) {
- return(NULL);
- }
- while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
- ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c)))) {
- COPY_BUF(l,buf,len,c);
- NEXTL(l);
- c = CUR_CHAR(l);
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseName: reached XML_MAX_NAMELEN limitn");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- NEXTL(l);
- c = CUR_CHAR(l);
- }
- break;
- }
- }
- return(xmlStrndup(buf, len));
- }
- /**
- * xmlParseStringName:
- * @ctxt: an XML parser context
- * @str: a pointer to an index in the string
- *
- * parse an XML name.
- *
- * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
- * CombiningChar | Extender
- *
- * [5] Name ::= (Letter | '_' | ':') (NameChar)*
- *
- * [6] Names ::= Name (S Name)*
- *
- * Returns the Name parsed or NULL. The str pointer
- * is updated to the current location in the string.
- */
- xmlChar *
- xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
- xmlChar buf[XML_MAX_NAMELEN + 5];
- const xmlChar *cur = *str;
- int len = 0, l;
- int c;
- c = CUR_SCHAR(cur, l);
- if (!IS_LETTER(c) && (c != '_') &&
- (c != ':')) {
- return(NULL);
- }
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- COPY_BUF(l,buf,len,c);
- cur += l;
- c = CUR_SCHAR(cur, l);
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseName: reached XML_MAX_NAMELEN limitn");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- cur += l;
- c = CUR_SCHAR(cur, l);
- }
- break;
- }
- }
- *str = cur;
- return(xmlStrndup(buf, len));
- }
- /**
- * xmlParseNmtoken:
- * @ctxt: an XML parser context
- *
- * parse an XML Nmtoken.
- *
- * [7] Nmtoken ::= (NameChar)+
- *
- * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
- *
- * Returns the Nmtoken parsed or NULL
- */
- xmlChar *
- xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
- xmlChar buf[XML_MAX_NAMELEN];
- int len = 0;
- int c,l;
- GROW;
- c = CUR_CHAR(l);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- COPY_BUF(l,buf,len,c);
- NEXTL(l);
- c = CUR_CHAR(l);
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseNmtoken: reached XML_MAX_NAMELEN limitn");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
- (c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
- (IS_COMBINING(c)) ||
- (IS_EXTENDER(c))) {
- NEXTL(l);
- c = CUR_CHAR(l);
- }
- break;
- }
- }
- if (len == 0)
- return(NULL);
- return(xmlStrndup(buf, len));
- }
- /**
- * xmlParseEntityValue:
- * @ctxt: an XML parser context
- * @orig: if non-NULL store a copy of the original entity value
- *
- * parse a value for ENTITY decl.
- *
- * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
- * "'" ([^%&'] | PEReference | Reference)* "'"
- *
- * Returns the EntityValue parsed with reference substitued or NULL
- */
- xmlChar *
- xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
- xmlChar *buf = NULL;
- int len = 0;
- int size = XML_PARSER_BUFFER_SIZE;
- int c, l;
- xmlChar stop;
- xmlChar *ret = NULL;
- const xmlChar *cur = NULL;
- xmlParserInputPtr input;
- if (RAW == '"') stop = '"';
- else if (RAW == ''') stop = ''';
- else {
- ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "EntityValue: " or ' expectedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return(NULL);
- }