parser.c
上传用户:sy_wanhua
上传日期:2013-07-25
资源大小:3048k
文件大小:295k
- /*
- * We should be at the end of the DOCTYPE declaration.
- */
- if (RAW != '>') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminatedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
- }
- NEXT;
- }
- /**
- * xmlParseAttribute:
- * @ctxt: an XML parser context
- * @value: a xmlChar ** used to store the value of the attribute
- *
- * parse an attribute
- *
- * [41] Attribute ::= Name Eq AttValue
- *
- * [ WFC: No External Entity References ]
- * Attribute values cannot contain direct or indirect entity references
- * to external entities.
- *
- * [ WFC: No < in Attribute Values ]
- * The replacement text of any entity referred to directly or indirectly in
- * an attribute value (other than "<") must not contain a <.
- *
- * [ VC: Attribute Value Type ]
- * The attribute must have been declared; the value must be of the type
- * declared for it.
- *
- * [25] Eq ::= S? '=' S?
- *
- * With namespace:
- *
- * [NS 11] Attribute ::= QName Eq AttValue
- *
- * Also the case QName == xmlns:??? is handled independently as a namespace
- * definition.
- *
- * Returns the attribute name, and the value in *value.
- */
- xmlChar *
- xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
- xmlChar *name, *val;
- *value = NULL;
- name = xmlParseName(ctxt);
- if (name == NULL) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "error parsing attribute namen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_NAME_REQUIRED;
- return(NULL);
- }
- /*
- * read the value
- */
- SKIP_BLANKS;
- if (RAW == '=') {
- NEXT;
- SKIP_BLANKS;
- val = xmlParseAttValue(ctxt);
- ctxt->instate = XML_PARSER_CONTENT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Specification mandate value for attribute %sn", name);
- ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- xmlFree(name);
- return(NULL);
- }
- /*
- * Check that xml:lang conforms to the specification
- */
- if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
- if (!xmlCheckLanguageID(val)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Invalid value for xml:lang : %sn", val);
- ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- }
- /*
- * Check that xml:space conforms to the specification
- */
- if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
- if (!xmlStrcmp(val, BAD_CAST "default"))
- *(ctxt->space) = 0;
- else if (!xmlStrcmp(val, BAD_CAST "preserve"))
- *(ctxt->space) = 1;
- else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Invalid value for xml:space : "%s", "default" or "preserve" expectedn",
- val);
- ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- }
- *value = val;
- return(name);
- }
- /**
- * xmlParseStartTag:
- * @ctxt: an XML parser context
- *
- * parse a start of tag either for rule element or
- * EmptyElement. In both case we don't parse the tag closing chars.
- *
- * [40] STag ::= '<' Name (S Attribute)* S? '>'
- *
- * [ WFC: Unique Att Spec ]
- * No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
- *
- * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
- *
- * [ WFC: Unique Att Spec ]
- * No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
- *
- * With namespace:
- *
- * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
- *
- * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
- *
- * Returne the element name parsed
- */
- xmlChar *
- xmlParseStartTag(xmlParserCtxtPtr ctxt) {
- xmlChar *name;
- xmlChar *attname;
- xmlChar *attvalue;
- const xmlChar **atts = NULL;
- int nbatts = 0;
- int maxatts = 0;
- int i;
- if (RAW != '<') return(NULL);
- NEXT;
- name = xmlParseName(ctxt);
- if (name == NULL) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseStartTag: invalid element namen");
- ctxt->errNo = XML_ERR_NAME_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return(NULL);
- }
- /*
- * Now parse the attributes, it ends up with the ending
- *
- * (S Attribute)* S?
- */
- SKIP_BLANKS;
- GROW;
- while ((IS_CHAR(RAW)) &&
- (RAW != '>') &&
- ((RAW != '/') || (NXT(1) != '>'))) {
- const xmlChar *q = CUR_PTR;
- int cons = ctxt->input->consumed;
- attname = xmlParseAttribute(ctxt, &attvalue);
- if ((attname != NULL) && (attvalue != NULL)) {
- /*
- * [ WFC: Unique Att Spec ]
- * No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
- */
- for (i = 0; i < nbatts;i += 2) {
- if (!xmlStrcmp(atts[i], attname)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Attribute %s redefinedn",
- attname);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
- xmlFree(attname);
- xmlFree(attvalue);
- goto failed;
- }
- }
- /*
- * Add the pair to atts
- */
- if (atts == NULL) {
- maxatts = 10;
- atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
- if (atts == NULL) {
- fprintf(stderr, "malloc of %ld byte failedn",
- maxatts * (long)sizeof(xmlChar *));
- return(NULL);
- }
- } else if (nbatts + 4 > maxatts) {
- maxatts *= 2;
- atts = (const xmlChar **) xmlRealloc(atts,
- maxatts * sizeof(xmlChar *));
- if (atts == NULL) {
- fprintf(stderr, "realloc of %ld byte failedn",
- maxatts * (long)sizeof(xmlChar *));
- return(NULL);
- }
- }
- atts[nbatts++] = attname;
- atts[nbatts++] = attvalue;
- atts[nbatts] = NULL;
- atts[nbatts + 1] = NULL;
- } else {
- if (attname != NULL)
- xmlFree(attname);
- if (attvalue != NULL)
- xmlFree(attvalue);
- }
- failed:
- if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
- break;
- if (!IS_BLANK(RAW)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "attributes construct errorn");
- ctxt->errNo = XML_ERR_SPACE_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- SKIP_BLANKS;
- if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseStartTag: problem parsing attributesn");
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- break;
- }
- GROW;
- }
- /*
- * SAX: Start of Element !
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->startElement(ctxt->userData, name, atts);
- if (atts != NULL) {
- for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
- xmlFree(atts);
- }
- return(name);
- }
- /**
- * xmlParseEndTag:
- * @ctxt: an XML parser context
- *
- * parse an end of tag
- *
- * [42] ETag ::= '</' Name S? '>'
- *
- * With namespace
- *
- * [NS 9] ETag ::= '</' QName S? '>'
- */
- void
- xmlParseEndTag(xmlParserCtxtPtr ctxt) {
- xmlChar *name;
- xmlChar *oldname;
- GROW;
- if ((RAW != '<') || (NXT(1) != '/')) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not foundn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
- return;
- }
- SKIP(2);
- name = xmlParseName(ctxt);
- /*
- * We should definitely be at the ending "S? '>'" part
- */
- GROW;
- SKIP_BLANKS;
- if ((!IS_CHAR(RAW)) || (RAW != '>')) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "End tag : expected '>'n");
- ctxt->errNo = XML_ERR_GT_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else
- NEXT;
- /*
- * [ WFC: Element Type Match ]
- * The Name in an element's end-tag must match the element type in the
- * start-tag.
- *
- */
- if ((name == NULL) || (ctxt->name == NULL) ||
- (xmlStrcmp(name, ctxt->name))) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
- if ((name != NULL) && (ctxt->name != NULL)) {
- ctxt->sax->error(ctxt->userData,
- "Opening and ending tag mismatch: %s and %sn",
- ctxt->name, name);
- } else if (ctxt->name != NULL) {
- ctxt->sax->error(ctxt->userData,
- "Ending tag eror for: %sn", ctxt->name);
- } else {
- ctxt->sax->error(ctxt->userData,
- "Ending tag error: internal error ???n");
- }
- }
- ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- /*
- * SAX: End of Tag
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endElement(ctxt->userData, name);
- if (name != NULL)
- xmlFree(name);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- return;
- }
- /**
- * xmlParseCDSect:
- * @ctxt: an XML parser context
- *
- * Parse escaped pure raw content.
- *
- * [18] CDSect ::= CDStart CData CDEnd
- *
- * [19] CDStart ::= '<![CDATA['
- *
- * [20] Data ::= (Char* - (Char* ']]>' Char*))
- *
- * [21] CDEnd ::= ']]>'
- */
- void
- xmlParseCDSect(xmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len = 0;
- int size = XML_PARSER_BUFFER_SIZE;
- int r, rl;
- int s, sl;
- int cur, l;
- if ((NXT(0) == '<') && (NXT(1) == '!') &&
- (NXT(2) == '[') && (NXT(3) == 'C') &&
- (NXT(4) == 'D') && (NXT(5) == 'A') &&
- (NXT(6) == 'T') && (NXT(7) == 'A') &&
- (NXT(8) == '[')) {
- SKIP(9);
- } else
- return;
- ctxt->instate = XML_PARSER_CDATA_SECTION;
- r = CUR_CHAR(rl);
- if (!IS_CHAR(r)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CData section not finishedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
- ctxt->instate = XML_PARSER_CONTENT;
- return;
- }
- NEXTL(rl);
- s = CUR_CHAR(sl);
- if (!IS_CHAR(s)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CData section not finishedn");
- ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->instate = XML_PARSER_CONTENT;
- return;
- }
- NEXTL(sl);
- cur = CUR_CHAR(l);
- buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "malloc of %d byte failedn", size);
- return;
- }
- while (IS_CHAR(cur) &&
- ((r != ']') || (s != ']') || (cur != '>'))) {
- if (len + 5 >= size) {
- size *= 2;
- buf = xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "realloc of %d byte failedn", size);
- return;
- }
- }
- COPY_BUF(rl,buf,len,r);
- r = s;
- rl = sl;
- s = cur;
- sl = l;
- NEXTL(l);
- cur = CUR_CHAR(l);
- }
- buf[len] = 0;
- ctxt->instate = XML_PARSER_CONTENT;
- if (cur != '>') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "CData section not finishedn%.50sn", buf);
- ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- xmlFree(buf);
- return;
- }
- NEXTL(l);
- /*
- * Ok the buffer is to be consumed as cdata.
- */
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
- if (ctxt->sax->cdataBlock != NULL)
- ctxt->sax->cdataBlock(ctxt->userData, buf, len);
- }
- xmlFree(buf);
- }
- /**
- * xmlParseContent:
- * @ctxt: an XML parser context
- *
- * Parse a content:
- *
- * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
- */
- void
- xmlParseContent(xmlParserCtxtPtr ctxt) {
- GROW;
- while (((RAW != 0) || (ctxt->token != 0)) &&
- ((RAW != '<') || (NXT(1) != '/'))) {
- const xmlChar *test = CUR_PTR;
- int cons = ctxt->input->consumed;
- xmlChar tok = ctxt->token;
- /*
- * Handle possible processed charrefs.
- */
- if (ctxt->token != 0) {
- xmlParseCharData(ctxt, 0);
- }
- /*
- * First case : a Processing Instruction.
- */
- else if ((RAW == '<') && (NXT(1) == '?')) {
- xmlParsePI(ctxt);
- }
- /*
- * Second case : a CDSection
- */
- else if ((RAW == '<') && (NXT(1) == '!') &&
- (NXT(2) == '[') && (NXT(3) == 'C') &&
- (NXT(4) == 'D') && (NXT(5) == 'A') &&
- (NXT(6) == 'T') && (NXT(7) == 'A') &&
- (NXT(8) == '[')) {
- xmlParseCDSect(ctxt);
- }
- /*
- * Third case : a comment
- */
- else if ((RAW == '<') && (NXT(1) == '!') &&
- (NXT(2) == '-') && (NXT(3) == '-')) {
- xmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_CONTENT;
- }
- /*
- * Fourth case : a sub-element.
- */
- else if (RAW == '<') {
- xmlParseElement(ctxt);
- }
- /*
- * Fifth case : a reference. If if has not been resolved,
- * parsing returns it's Name, create the node
- */
- else if (RAW == '&') {
- xmlParseReference(ctxt);
- }
- /*
- * Last case, text. Note that References are handled directly.
- */
- else {
- xmlParseCharData(ctxt, 0);
- }
- GROW;
- /*
- * Pop-up of finished entities.
- */
- while ((RAW == 0) && (ctxt->inputNr > 1))
- xmlPopInput(ctxt);
- SHRINK;
- if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
- (tok == ctxt->token)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "detected an error in element contentn");
- ctxt->errNo = XML_ERR_INTERNAL_ERROR;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- break;
- }
- }
- }
- /**
- * xmlParseElement:
- * @ctxt: an XML parser context
- *
- * parse an XML element, this is highly recursive
- *
- * [39] element ::= EmptyElemTag | STag content ETag
- *
- * [ WFC: Element Type Match ]
- * The Name in an element's end-tag must match the element type in the
- * start-tag.
- *
- * [ VC: Element Valid ]
- * An element is valid if there is a declaration matching elementdecl
- * where the Name matches the element type and one of the following holds:
- * - The declaration matches EMPTY and the element has no content.
- * - The declaration matches children and the sequence of child elements
- * belongs to the language generated by the regular expression in the
- * content model, with optional white space (characters matching the
- * nonterminal S) between each pair of child elements.
- * - The declaration matches Mixed and the content consists of character
- * data and child elements whose types match names in the content model.
- * - The declaration matches ANY, and the types of any child elements have
- * been declared.
- */
- void
- xmlParseElement(xmlParserCtxtPtr ctxt) {
- const xmlChar *openTag = CUR_PTR;
- xmlChar *name;
- xmlChar *oldname;
- xmlParserNodeInfo node_info;
- xmlNodePtr ret;
- /* Capture start position */
- if (ctxt->record_info) {
- node_info.begin_pos = ctxt->input->consumed +
- (CUR_PTR - ctxt->input->base);
- node_info.begin_line = ctxt->input->line;
- }
- if (ctxt->spaceNr == 0)
- spacePush(ctxt, -1);
- else
- spacePush(ctxt, *ctxt->space);
- name = xmlParseStartTag(ctxt);
- if (name == NULL) {
- spacePop(ctxt);
- return;
- }
- namePush(ctxt, name);
- ret = ctxt->node;
- /*
- * [ VC: Root Element Type ]
- * The Name in the document type declaration must match the element
- * type of the root element.
- */
- if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
- ctxt->node && (ctxt->node == ctxt->myDoc->children))
- ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
- /*
- * Check for an Empty Element.
- */
- if ((RAW == '/') && (NXT(1) == '>')) {
- SKIP(2);
- if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endElement(ctxt->userData, name);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- return;
- }
- if (RAW == '>') {
- NEXT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Couldn't find end of Start Tagn%.30sn",
- openTag);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_GT_REQUIRED;
- /*
- * end of parsing of this node.
- */
- nodePop(ctxt);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- /*
- * Capture end position and add node
- */
- if ( ret != NULL && ctxt->record_info ) {
- node_info.end_pos = ctxt->input->consumed +
- (CUR_PTR - ctxt->input->base);
- node_info.end_line = ctxt->input->line;
- node_info.node = ret;
- xmlParserAddNodeInfo(ctxt, &node_info);
- }
- return;
- }
- /*
- * Parse the content of the element:
- */
- xmlParseContent(ctxt);
- if (!IS_CHAR(RAW)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Premature end of data in tag %.30sn", openTag);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
- /*
- * end of parsing of this node.
- */
- nodePop(ctxt);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- return;
- }
- /*
- * parse the end of tag: '</' should be here.
- */
- xmlParseEndTag(ctxt);
- /*
- * Capture end position and add node
- */
- if ( ret != NULL && ctxt->record_info ) {
- node_info.end_pos = ctxt->input->consumed +
- (CUR_PTR - ctxt->input->base);
- node_info.end_line = ctxt->input->line;
- node_info.node = ret;
- xmlParserAddNodeInfo(ctxt, &node_info);
- }
- }
- /**
- * xmlParseVersionNum:
- * @ctxt: an XML parser context
- *
- * parse the XML version value.
- *
- * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
- *
- * Returns the string giving the XML version number, or NULL
- */
- xmlChar *
- xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len = 0;
- int size = 10;
- xmlChar cur;
- buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "malloc of %d byte failedn", size);
- return(NULL);
- }
- cur = CUR;
- while (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z')) ||
- ((cur >= '0') && (cur <= '9')) ||
- (cur == '_') || (cur == '.') ||
- (cur == ':') || (cur == '-')) {
- if (len + 1 >= size) {
- size *= 2;
- buf = xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "realloc of %d byte failedn", size);
- return(NULL);
- }
- }
- buf[len++] = cur;
- NEXT;
- cur=CUR;
- }
- buf[len] = 0;
- return(buf);
- }
- /**
- * xmlParseVersionInfo:
- * @ctxt: an XML parser context
- *
- * parse the XML version.
- *
- * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
- *
- * [25] Eq ::= S? '=' S?
- *
- * Returns the version string, e.g. "1.0"
- */
- xmlChar *
- xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
- xmlChar *version = NULL;
- const xmlChar *q;
- if ((RAW == 'v') && (NXT(1) == 'e') &&
- (NXT(2) == 'r') && (NXT(3) == 's') &&
- (NXT(4) == 'i') && (NXT(5) == 'o') &&
- (NXT(6) == 'n')) {
- SKIP(7);
- SKIP_BLANKS;
- if (RAW != '=') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseVersionInfo : expected '='n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
- return(NULL);
- }
- NEXT;
- SKIP_BLANKS;
- if (RAW == '"') {
- NEXT;
- q = CUR_PTR;
- version = xmlParseVersionNum(ctxt);
- if (RAW != '"') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closedn%.50sn", q);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- } else
- NEXT;
- } else if (RAW == '''){
- NEXT;
- q = CUR_PTR;
- version = xmlParseVersionNum(ctxt);
- if (RAW != ''') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closedn%.50sn", q);
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else
- NEXT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseVersionInfo : expected ' or "n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
- }
- }
- return(version);
- }
- /**
- * xmlParseEncName:
- * @ctxt: an XML parser context
- *
- * parse the XML encoding name
- *
- * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
- *
- * Returns the encoding name value or NULL
- */
- xmlChar *
- xmlParseEncName(xmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len = 0;
- int size = 10;
- xmlChar cur;
- cur = CUR;
- if (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z'))) {
- buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "malloc of %d byte failedn", size);
- return(NULL);
- }
-
- buf[len++] = cur;
- NEXT;
- cur = CUR;
- while (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z')) ||
- ((cur >= '0') && (cur <= '9')) ||
- (cur == '.') || (cur == '_') ||
- (cur == '-')) {
- if (len + 1 >= size) {
- size *= 2;
- buf = xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- fprintf(stderr, "realloc of %d byte failedn", size);
- return(NULL);
- }
- }
- buf[len++] = cur;
- NEXT;
- cur = CUR;
- if (cur == 0) {
- SHRINK;
- GROW;
- cur = CUR;
- }
- }
- buf[len] = 0;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Invalid XML encoding namen");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_ENCODING_NAME;
- }
- return(buf);
- }
- /**
- * xmlParseEncodingDecl:
- * @ctxt: an XML parser context
- *
- * parse the XML encoding declaration
- *
- * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
- *
- * TODO: this should setup the conversion filters.
- *
- * Returns the encoding value or NULL
- */
- xmlChar *
- xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
- xmlChar *encoding = NULL;
- const xmlChar *q;
- SKIP_BLANKS;
- if ((RAW == 'e') && (NXT(1) == 'n') &&
- (NXT(2) == 'c') && (NXT(3) == 'o') &&
- (NXT(4) == 'd') && (NXT(5) == 'i') &&
- (NXT(6) == 'n') && (NXT(7) == 'g')) {
- SKIP(8);
- SKIP_BLANKS;
- if (RAW != '=') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseEncodingDecl : expected '='n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
- return(NULL);
- }
- NEXT;
- SKIP_BLANKS;
- if (RAW == '"') {
- NEXT;
- q = CUR_PTR;
- encoding = xmlParseEncName(ctxt);
- if (RAW != '"') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closedn%.50sn", q);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- } else
- NEXT;
- } else if (RAW == '''){
- NEXT;
- q = CUR_PTR;
- encoding = xmlParseEncName(ctxt);
- if (RAW != ''') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "String not closedn%.50sn", q);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- } else
- NEXT;
- } else if (RAW == '"'){
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlParseEncodingDecl : expected ' or "n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
- }
- }
- return(encoding);
- }
- /**
- * xmlParseSDDecl:
- * @ctxt: an XML parser context
- *
- * parse the XML standalone declaration
- *
- * [32] SDDecl ::= S 'standalone' Eq
- * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
- *
- * [ VC: Standalone Document Declaration ]
- * TODO The standalone document declaration must have the value "no"
- * if any external markup declarations contain declarations of:
- * - attributes with default values, if elements to which these
- * attributes apply appear in the document without specifications
- * of values for these attributes, or
- * - entities (other than amp, lt, gt, apos, quot), if references
- * to those entities appear in the document, or
- * - attributes with values subject to normalization, where the
- * attribute appears in the document with a value which will change
- * as a result of normalization, or
- * - element types with element content, if white space occurs directly
- * within any instance of those types.
- *
- * Returns 1 if standalone, 0 otherwise
- */
- int
- xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
- int standalone = -1;
- SKIP_BLANKS;
- if ((RAW == 's') && (NXT(1) == 't') &&
- (NXT(2) == 'a') && (NXT(3) == 'n') &&
- (NXT(4) == 'd') && (NXT(5) == 'a') &&
- (NXT(6) == 'l') && (NXT(7) == 'o') &&
- (NXT(8) == 'n') && (NXT(9) == 'e')) {
- SKIP(10);
- SKIP_BLANKS;
- if (RAW != '=') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "XML standalone declaration : expected '='n");
- ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- return(standalone);
- }
- NEXT;
- SKIP_BLANKS;
- if (RAW == '''){
- NEXT;
- if ((RAW == 'n') && (NXT(1) == 'o')) {
- standalone = 0;
- SKIP(2);
- } else if ((RAW == 'y') && (NXT(1) == 'e') &&
- (NXT(2) == 's')) {
- standalone = 1;
- SKIP(3);
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "standalone accepts only 'yes' or 'no'n");
- ctxt->errNo = XML_ERR_STANDALONE_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- if (RAW != ''') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "String not closedn");
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- } else
- NEXT;
- } else if (RAW == '"'){
- NEXT;
- if ((RAW == 'n') && (NXT(1) == 'o')) {
- standalone = 0;
- SKIP(2);
- } else if ((RAW == 'y') && (NXT(1) == 'e') &&
- (NXT(2) == 's')) {
- standalone = 1;
- SKIP(3);
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "standalone accepts only 'yes' or 'no'n");
- ctxt->errNo = XML_ERR_STANDALONE_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- if (RAW != '"') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "String not closedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
- } else
- NEXT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Standalone value not foundn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
- }
- }
- return(standalone);
- }
- /**
- * xmlParseXMLDecl:
- * @ctxt: an XML parser context
- *
- * parse an XML declaration header
- *
- * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
- */
- void
- xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
- xmlChar *version;
- /*
- * We know that '<?xml' is here.
- */
- SKIP(5);
- if (!IS_BLANK(RAW)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'n");
- ctxt->errNo = XML_ERR_SPACE_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- SKIP_BLANKS;
- /*
- * We should have the VersionInfo here.
- */
- version = xmlParseVersionInfo(ctxt);
- if (version == NULL)
- version = xmlCharStrdup(XML_DEFAULT_VERSION);
- ctxt->version = xmlStrdup(version);
- xmlFree(version);
- /*
- * We may have the encoding declaration
- */
- if (!IS_BLANK(RAW)) {
- if ((RAW == '?') && (NXT(1) == '>')) {
- SKIP(2);
- return;
- }
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Blank needed heren");
- ctxt->errNo = XML_ERR_SPACE_REQUIRED;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
- /*
- * We may have the standalone status.
- */
- if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
- if ((RAW == '?') && (NXT(1) == '>')) {
- SKIP(2);
- return;
- }
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Blank needed heren");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_SPACE_REQUIRED;
- }
- SKIP_BLANKS;
- ctxt->input->standalone = xmlParseSDDecl(ctxt);
- SKIP_BLANKS;
- if ((RAW == '?') && (NXT(1) == '>')) {
- SKIP(2);
- } else if (RAW == '>') {
- /* Deprecated old WD ... */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "XML declaration must end-up with '?>'n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
- NEXT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "parsing XML declaration: '?>' expectedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
- MOVETO_ENDTAG(CUR_PTR);
- NEXT;
- }
- }
- /**
- * xmlParseMisc:
- * @ctxt: an XML parser context
- *
- * parse an XML Misc* optionnal field.
- *
- * [27] Misc ::= Comment | PI | S
- */
- void
- xmlParseMisc(xmlParserCtxtPtr ctxt) {
- while (((RAW == '<') && (NXT(1) == '?')) ||
- ((RAW == '<') && (NXT(1) == '!') &&
- (NXT(2) == '-') && (NXT(3) == '-')) ||
- IS_BLANK(CUR)) {
- if ((RAW == '<') && (NXT(1) == '?')) {
- xmlParsePI(ctxt);
- } else if (IS_BLANK(CUR)) {
- NEXT;
- } else
- xmlParseComment(ctxt);
- }
- }
- /**
- * xmlParseDocument:
- * @ctxt: an XML parser context
- *
- * parse an XML document (and build a tree if using the standard SAX
- * interface).
- *
- * [1] document ::= prolog element Misc*
- *
- * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
- *
- * Returns 0, -1 in case of error. the parser context is augmented
- * as a result of the parsing.
- */
- int
- xmlParseDocument(xmlParserCtxtPtr ctxt) {
- xmlChar start[4];
- xmlCharEncoding enc;
- xmlDefaultSAXHandlerInit();
- GROW;
- /*
- * SAX: beginning of the document processing.
- */
- if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
- ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
- /*
- * Get the 4 first bytes and decode the charset
- * if enc != XML_CHAR_ENCODING_NONE
- * plug some encoding conversion routines.
- */
- start[0] = RAW;
- start[1] = NXT(1);
- start[2] = NXT(2);
- start[3] = NXT(3);
- enc = xmlDetectCharEncoding(start, 4);
- if (enc != XML_CHAR_ENCODING_NONE) {
- xmlSwitchEncoding(ctxt, enc);
- }
- if (CUR == 0) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Document is emptyn");
- ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- /*
- * Check for the XMLDecl in the Prolog.
- */
- GROW;
- if ((RAW == '<') && (NXT(1) == '?') &&
- (NXT(2) == 'x') && (NXT(3) == 'm') &&
- (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
- xmlParseXMLDecl(ctxt);
- ctxt->standalone = ctxt->input->standalone;
- SKIP_BLANKS;
- if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
- ctxt->encoding = xmlStrdup(ctxt->input->encoding);
- } else {
- ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
- }
- if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
- ctxt->sax->startDocument(ctxt->userData);
- /*
- * The Misc part of the Prolog
- */
- GROW;
- xmlParseMisc(ctxt);
- /*
- * Then possibly doc type declaration(s) and more Misc
- * (doctypedecl Misc*)?
- */
- GROW;
- if ((RAW == '<') && (NXT(1) == '!') &&
- (NXT(2) == 'D') && (NXT(3) == 'O') &&
- (NXT(4) == 'C') && (NXT(5) == 'T') &&
- (NXT(6) == 'Y') && (NXT(7) == 'P') &&
- (NXT(8) == 'E')) {
- ctxt->inSubset = 1;
- xmlParseDocTypeDecl(ctxt);
- if (RAW == '[') {
- ctxt->instate = XML_PARSER_DTD;
- xmlParseInternalSubset(ctxt);
- }
- /*
- * Create and update the external subset.
- */
- ctxt->inSubset = 2;
- if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
- ctxt->extSubSystem, ctxt->extSubURI);
- ctxt->inSubset = 0;
- ctxt->instate = XML_PARSER_PROLOG;
- xmlParseMisc(ctxt);
- }
- /*
- * Time to start parsing the tree itself
- */
- GROW;
- if (RAW != '<') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Start tag expected, '<' not foundn");
- ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->instate = XML_PARSER_EOF;
- } else {
- ctxt->instate = XML_PARSER_CONTENT;
- xmlParseElement(ctxt);
- ctxt->instate = XML_PARSER_EPILOG;
- /*
- * The Misc part at the end
- */
- xmlParseMisc(ctxt);
- if (RAW != 0) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Extra content at the end of the documentn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_DOCUMENT_END;
- }
- ctxt->instate = XML_PARSER_EOF;
- }
- /*
- * SAX: end of the document processing.
- */
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endDocument(ctxt->userData);
- /*
- * Grab the encoding if it was added on-the-fly
- */
- if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
- (ctxt->myDoc->encoding == NULL)) {
- ctxt->myDoc->encoding = ctxt->encoding;
- ctxt->encoding = NULL;
- }
- if (! ctxt->wellFormed) return(-1);
- return(0);
- }
- /************************************************************************
- * *
- * Progressive parsing interfaces *
- * *
- ************************************************************************/
- /**
- * xmlParseLookupSequence:
- * @ctxt: an XML parser context
- * @first: the first char to lookup
- * @next: the next char to lookup or zero
- * @third: the next char to lookup or zero
- *
- * Try to find if a sequence (first, next, third) or just (first next) or
- * (first) is available in the input stream.
- * This function has a side effect of (possibly) incrementing ctxt->checkIndex
- * to avoid rescanning sequences of bytes, it DOES change the state of the
- * parser, do not use liberally.
- *
- * Returns the index to the current parsing point if the full sequence
- * is available, -1 otherwise.
- */
- int
- xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
- xmlChar next, xmlChar third) {
- int base, len;
- xmlParserInputPtr in;
- const xmlChar *buf;
- in = ctxt->input;
- if (in == NULL) return(-1);
- base = in->cur - in->base;
- if (base < 0) return(-1);
- if (ctxt->checkIndex > base)
- base = ctxt->checkIndex;
- if (in->buf == NULL) {
- buf = in->base;
- len = in->length;
- } else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
- }
- /* take into account the sequence length */
- if (third) len -= 2;
- else if (next) len --;
- for (;base < len;base++) {
- if (buf[base] == first) {
- if (third != 0) {
- if ((buf[base + 1] != next) ||
- (buf[base + 2] != third)) continue;
- } else if (next != 0) {
- if (buf[base + 1] != next) continue;
- }
- ctxt->checkIndex = 0;
- #ifdef DEBUG_PUSH
- if (next == 0)
- fprintf(stderr, "PP: lookup '%c' found at %dn",
- first, base);
- else if (third == 0)
- fprintf(stderr, "PP: lookup '%c%c' found at %dn",
- first, next, base);
- else
- fprintf(stderr, "PP: lookup '%c%c%c' found at %dn",
- first, next, third, base);
- #endif
- return(base - (in->cur - in->base));
- }
- }
- ctxt->checkIndex = base;
- #ifdef DEBUG_PUSH
- if (next == 0)
- fprintf(stderr, "PP: lookup '%c' failedn", first);
- else if (third == 0)
- fprintf(stderr, "PP: lookup '%c%c' failedn", first, next);
- else
- fprintf(stderr, "PP: lookup '%c%c%c' failedn", first, next, third);
- #endif
- return(-1);
- }
- /**
- * xmlParseTryOrFinish:
- * @ctxt: an XML parser context
- * @terminate: last chunk indicator
- *
- * Try to progress on parsing
- *
- * Returns zero if no parsing was possible
- */
- int
- xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
- int ret = 0;
- int avail;
- xmlChar cur, next;
- #ifdef DEBUG_PUSH
- switch (ctxt->instate) {
- case XML_PARSER_EOF:
- fprintf(stderr, "PP: try EOFn"); break;
- case XML_PARSER_START:
- fprintf(stderr, "PP: try STARTn"); break;
- case XML_PARSER_MISC:
- fprintf(stderr, "PP: try MISCn");break;
- case XML_PARSER_COMMENT:
- fprintf(stderr, "PP: try COMMENTn");break;
- case XML_PARSER_PROLOG:
- fprintf(stderr, "PP: try PROLOGn");break;
- case XML_PARSER_START_TAG:
- fprintf(stderr, "PP: try START_TAGn");break;
- case XML_PARSER_CONTENT:
- fprintf(stderr, "PP: try CONTENTn");break;
- case XML_PARSER_CDATA_SECTION:
- fprintf(stderr, "PP: try CDATA_SECTIONn");break;
- case XML_PARSER_END_TAG:
- fprintf(stderr, "PP: try END_TAGn");break;
- case XML_PARSER_ENTITY_DECL:
- fprintf(stderr, "PP: try ENTITY_DECLn");break;
- case XML_PARSER_ENTITY_VALUE:
- fprintf(stderr, "PP: try ENTITY_VALUEn");break;
- case XML_PARSER_ATTRIBUTE_VALUE:
- fprintf(stderr, "PP: try ATTRIBUTE_VALUEn");break;
- case XML_PARSER_DTD:
- fprintf(stderr, "PP: try DTDn");break;
- case XML_PARSER_EPILOG:
- fprintf(stderr, "PP: try EPILOGn");break;
- case XML_PARSER_PI:
- fprintf(stderr, "PP: try PIn");break;
- }
- #endif
- while (1) {
- /*
- * Pop-up of finished entities.
- */
- while ((RAW == 0) && (ctxt->inputNr > 1))
- xmlPopInput(ctxt);
- if (ctxt->input ==NULL) break;
- if (ctxt->input->buf == NULL)
- avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
- else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 1)
- goto done;
- switch (ctxt->instate) {
- case XML_PARSER_EOF:
- /*
- * Document parsing is done !
- */
- goto done;
- case XML_PARSER_START:
- /*
- * Very first chars read from the document flow.
- */
- cur = ctxt->input->cur[0];
- if (IS_BLANK(cur)) {
- if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
- ctxt->sax->setDocumentLocator(ctxt->userData,
- &xmlDefaultSAXLocator);
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Extra spaces at the beginning of the document are not allowedn");
- ctxt->errNo = XML_ERR_DOCUMENT_START;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- SKIP_BLANKS;
- ret++;
- if (ctxt->input->buf == NULL)
- avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
- else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- }
- if (avail < 2)
- goto done;
- cur = ctxt->input->cur[0];
- next = ctxt->input->cur[1];
- if (cur == 0) {
- if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
- ctxt->sax->setDocumentLocator(ctxt->userData,
- &xmlDefaultSAXLocator);
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Document is emptyn");
- ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->instate = XML_PARSER_EOF;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EOFn");
- #endif
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
- ctxt->sax->endDocument(ctxt->userData);
- goto done;
- }
- if ((cur == '<') && (next == '?')) {
- /* PI or XML decl */
- if (avail < 5) return(ret);
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- return(ret);
- if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
- ctxt->sax->setDocumentLocator(ctxt->userData,
- &xmlDefaultSAXLocator);
- if ((ctxt->input->cur[2] == 'x') &&
- (ctxt->input->cur[3] == 'm') &&
- (ctxt->input->cur[4] == 'l') &&
- (IS_BLANK(ctxt->input->cur[5]))) {
- ret += 5;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing XML Decln");
- #endif
- xmlParseXMLDecl(ctxt);
- ctxt->standalone = ctxt->input->standalone;
- if ((ctxt->encoding == NULL) &&
- (ctxt->input->encoding != NULL))
- ctxt->encoding = xmlStrdup(ctxt->input->encoding);
- if ((ctxt->sax) && (ctxt->sax->startDocument) &&
- (!ctxt->disableSAX))
- ctxt->sax->startDocument(ctxt->userData);
- ctxt->instate = XML_PARSER_MISC;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering MISCn");
- #endif
- } else {
- ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
- if ((ctxt->sax) && (ctxt->sax->startDocument) &&
- (!ctxt->disableSAX))
- ctxt->sax->startDocument(ctxt->userData);
- ctxt->instate = XML_PARSER_MISC;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering MISCn");
- #endif
- }
- } else {
- if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
- ctxt->sax->setDocumentLocator(ctxt->userData,
- &xmlDefaultSAXLocator);
- ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
- if ((ctxt->sax) && (ctxt->sax->startDocument) &&
- (!ctxt->disableSAX))
- ctxt->sax->startDocument(ctxt->userData);
- ctxt->instate = XML_PARSER_MISC;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering MISCn");
- #endif
- }
- break;
- case XML_PARSER_MISC:
- SKIP_BLANKS;
- if (ctxt->input->buf == NULL)
- avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
- else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
- goto done;
- cur = ctxt->input->cur[0];
- next = ctxt->input->cur[1];
- if ((cur == '<') && (next == '?')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing PIn");
- #endif
- xmlParsePI(ctxt);
- } else if ((cur == '<') && (next == '!') &&
- (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing Commentn");
- #endif
- xmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_MISC;
- } else if ((cur == '<') && (next == '!') &&
- (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
- (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
- (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
- (ctxt->input->cur[8] == 'E')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing internal subsetn");
- #endif
- ctxt->inSubset = 1;
- xmlParseDocTypeDecl(ctxt);
- if (RAW == '[') {
- ctxt->instate = XML_PARSER_DTD;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering DTDn");
- #endif
- } else {
- /*
- * Create and update the external subset.
- */
- ctxt->inSubset = 2;
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- (ctxt->sax->externalSubset != NULL))
- ctxt->sax->externalSubset(ctxt->userData,
- ctxt->intSubName, ctxt->extSubSystem,
- ctxt->extSubURI);
- ctxt->inSubset = 0;
- ctxt->instate = XML_PARSER_PROLOG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering PROLOGn");
- #endif
- }
- } else if ((cur == '<') && (next == '!') &&
- (avail < 9)) {
- goto done;
- } else {
- ctxt->instate = XML_PARSER_START_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering START_TAGn");
- #endif
- }
- break;
- case XML_PARSER_PROLOG:
- SKIP_BLANKS;
- if (ctxt->input->buf == NULL)
- avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
- else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
- goto done;
- cur = ctxt->input->cur[0];
- next = ctxt->input->cur[1];
- if ((cur == '<') && (next == '?')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing PIn");
- #endif
- xmlParsePI(ctxt);
- } else if ((cur == '<') && (next == '!') &&
- (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing Commentn");
- #endif
- xmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_PROLOG;
- } else if ((cur == '<') && (next == '!') &&
- (avail < 4)) {
- goto done;
- } else {
- ctxt->instate = XML_PARSER_START_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering START_TAGn");
- #endif
- }
- break;
- case XML_PARSER_EPILOG:
- SKIP_BLANKS;
- if (ctxt->input->buf == NULL)
- avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
- else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
- goto done;
- cur = ctxt->input->cur[0];
- next = ctxt->input->cur[1];
- if ((cur == '<') && (next == '?')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing PIn");
- #endif
- xmlParsePI(ctxt);
- ctxt->instate = XML_PARSER_EPILOG;
- } else if ((cur == '<') && (next == '!') &&
- (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing Commentn");
- #endif
- xmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_EPILOG;
- } else if ((cur == '<') && (next == '!') &&
- (avail < 4)) {
- goto done;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Extra content at the end of the documentn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_DOCUMENT_END;
- ctxt->instate = XML_PARSER_EOF;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EOFn");
- #endif
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endDocument(ctxt->userData);
- goto done;
- }
- break;
- case XML_PARSER_START_TAG: {
- xmlChar *name, *oldname;
- if ((avail < 2) && (ctxt->inputNr == 1))
- goto done;
- cur = ctxt->input->cur[0];
- if (cur != '<') {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Start tag expect, '<' not foundn");
- ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->instate = XML_PARSER_EOF;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EOFn");
- #endif
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endDocument(ctxt->userData);
- goto done;
- }
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
- goto done;
- if (ctxt->spaceNr == 0)
- spacePush(ctxt, -1);
- else
- spacePush(ctxt, *ctxt->space);
- name = xmlParseStartTag(ctxt);
- if (name == NULL) {
- spacePop(ctxt);
- ctxt->instate = XML_PARSER_EOF;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EOFn");
- #endif
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endDocument(ctxt->userData);
- goto done;
- }
- namePush(ctxt, xmlStrdup(name));
- /*
- * [ VC: Root Element Type ]
- * The Name in the document type declaration must match
- * the element type of the root element.
- */
- if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
- ctxt->node && (ctxt->node == ctxt->myDoc->children))
- ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
- /*
- * Check for an Empty Element.
- */
- if ((RAW == '/') && (NXT(1) == '>')) {
- SKIP(2);
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
- ctxt->sax->endElement(ctxt->userData, name);
- xmlFree(name);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- if (ctxt->name == NULL) {
- ctxt->instate = XML_PARSER_EPILOG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EPILOGn");
- #endif
- } else {
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- }
- break;
- }
- if (RAW == '>') {
- NEXT;
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Couldn't find end of Start Tag %sn",
- name);
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_GT_REQUIRED;
- /*
- * end of parsing of this node.
- */
- nodePop(ctxt);
- oldname = namePop(ctxt);
- spacePop(ctxt);
- if (oldname != NULL) {
- #ifdef DEBUG_STACK
- fprintf(stderr,"Close: popped %sn", oldname);
- #endif
- xmlFree(oldname);
- }
- }
- xmlFree(name);
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- break;
- }
- case XML_PARSER_CONTENT:
- /*
- * Handle preparsed entities and charRef
- */
- if (ctxt->token != 0) {
- xmlChar cur[2] = { 0 , 0 } ;
- cur[0] = (xmlChar) ctxt->token;
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- (ctxt->sax->characters != NULL))
- ctxt->sax->characters(ctxt->userData, cur, 1);
- ctxt->token = 0;
- }
- if ((avail < 2) && (ctxt->inputNr == 1))
- goto done;
- cur = ctxt->input->cur[0];
- next = ctxt->input->cur[1];
- if ((cur == '<') && (next == '?')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing PIn");
- #endif
- xmlParsePI(ctxt);
- } else if ((cur == '<') && (next == '!') &&
- (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing Commentn");
- #endif
- xmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_CONTENT;
- } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
- (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
- (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
- (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
- (ctxt->input->cur[8] == '[')) {
- SKIP(9);
- ctxt->instate = XML_PARSER_CDATA_SECTION;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CDATA_SECTIONn");
- #endif
- break;
- } else if ((cur == '<') && (next == '!') &&
- (avail < 9)) {
- goto done;
- } else if ((cur == '<') && (next == '/')) {
- ctxt->instate = XML_PARSER_END_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering END_TAGn");
- #endif
- break;
- } else if (cur == '<') {
- ctxt->instate = XML_PARSER_START_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering START_TAGn");
- #endif
- break;
- } else if (cur == '&') {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
- goto done;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing Referencen");
- #endif
- /* TODO: check generation of subtrees if noent !!! */
- xmlParseReference(ctxt);
- } else {
- /* TODO Avoid the extra copy, handle directly !!! */
- /*
- * Goal of the following test is:
- * - minimize calls to the SAX 'character' callback
- * when they are mergeable
- * - handle an problem for isBlank when we only parse
- * a sequence of blank chars and the next one is
- * not available to check against '<' presence.
- * - tries to homogenize the differences in SAX
- * callbacks beween the push and pull versions
- * of the parser.
- */
- if ((ctxt->inputNr == 1) &&
- (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
- goto done;
- }
- ctxt->checkIndex = 0;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: Parsing char datan");
- #endif
- xmlParseCharData(ctxt, 0);
- }
- /*
- * Pop-up of finished entities.
- */
- while ((RAW == 0) && (ctxt->inputNr > 1))
- xmlPopInput(ctxt);
- break;
- case XML_PARSER_CDATA_SECTION: {
- /*
- * The Push mode need to have the SAX callback for
- * cdataBlock merge back contiguous callbacks.
- */
- int base;
- base = xmlParseLookupSequence(ctxt, ']', ']', '>');
- if (base < 0) {
- if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
- if (ctxt->sax->cdataBlock != NULL)
- ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
- XML_PARSER_BIG_BUFFER_SIZE);
- }
- SKIP(XML_PARSER_BIG_BUFFER_SIZE);
- ctxt->checkIndex = 0;
- }
- goto done;
- } else {
- if ((ctxt->sax != NULL) && (base > 0) &&
- (!ctxt->disableSAX)) {
- if (ctxt->sax->cdataBlock != NULL)
- ctxt->sax->cdataBlock(ctxt->userData,
- ctxt->input->cur, base);
- }
- SKIP(base + 3);
- ctxt->checkIndex = 0;
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- }
- break;
- }
- case XML_PARSER_END_TAG:
- if (avail < 2)
- goto done;
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
- goto done;
- xmlParseEndTag(ctxt);
- if (ctxt->name == NULL) {
- ctxt->instate = XML_PARSER_EPILOG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering EPILOGn");
- #endif
- } else {
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- }
- break;
- case XML_PARSER_DTD: {
- /*
- * Sorry but progressive parsing of the internal subset
- * is not expected to be supported. We first check that
- * the full content of the internal subset is available and
- * the parsing is launched only at that point.
- * Internal subset ends up with "']' S? '>'" in an unescaped
- * section and not in a ']]>' sequence which are conditional
- * sections (whoever argued to keep that crap in XML deserve
- * a place in hell !).
- */
- int base, i;
- xmlChar *buf;
- xmlChar quote = 0;
- base = ctxt->input->cur - ctxt->input->base;
- if (base < 0) return(0);
- if (ctxt->checkIndex > base)
- base = ctxt->checkIndex;
- buf = ctxt->input->buf->buffer->content;
- for (;base < ctxt->input->buf->buffer->use;base++) {
- if (quote != 0) {
- if (buf[base] == quote)
- quote = 0;
- continue;
- }
- if (buf[base] == '"') {
- quote = '"';
- continue;
- }
- if (buf[base] == ''') {
- quote = ''';
- continue;
- }
- if (buf[base] == ']') {
- if (base +1 >= ctxt->input->buf->buffer->use)
- break;
- if (buf[base + 1] == ']') {
- /* conditional crap, skip both ']' ! */
- base++;
- continue;
- }
- for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
- if (buf[base + i] == '>')
- goto found_end_int_subset;
- }
- break;
- }
- }
- /*
- * We didn't found the end of the Internal subset
- */
- if (quote == 0)
- ctxt->checkIndex = base;
- #ifdef DEBUG_PUSH
- if (next == 0)
- fprintf(stderr, "PP: lookup of int subset end filedn");
- #endif
- goto done;
- found_end_int_subset:
- xmlParseInternalSubset(ctxt);
- ctxt->inSubset = 2;
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- (ctxt->sax->externalSubset != NULL))
- ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
- ctxt->extSubSystem, ctxt->extSubURI);
- ctxt->inSubset = 0;
- ctxt->instate = XML_PARSER_PROLOG;
- ctxt->checkIndex = 0;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering PROLOGn");
- #endif
- break;
- }
- case XML_PARSER_COMMENT:
- fprintf(stderr, "PP: internal error, state == COMMENTn");
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- break;
- case XML_PARSER_PI:
- fprintf(stderr, "PP: internal error, state == PIn");
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering CONTENTn");
- #endif
- break;
- case XML_PARSER_ENTITY_DECL:
- fprintf(stderr, "PP: internal error, state == ENTITY_DECLn");
- ctxt->instate = XML_PARSER_DTD;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering DTDn");
- #endif
- break;
- case XML_PARSER_ENTITY_VALUE:
- fprintf(stderr, "PP: internal error, state == ENTITY_VALUEn");
- ctxt->instate = XML_PARSER_CONTENT;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering DTDn");
- #endif
- break;
- case XML_PARSER_ATTRIBUTE_VALUE:
- fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUEn");
- ctxt->instate = XML_PARSER_START_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering START_TAGn");
- #endif
- break;
- case XML_PARSER_SYSTEM_LITERAL:
- fprintf(stderr, "PP: internal error, state == SYSTEM_LITERALn");
- ctxt->instate = XML_PARSER_START_TAG;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: entering START_TAGn");
- #endif
- break;
- }
- }
- done:
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: done %dn", ret);
- #endif
- return(ret);
- }
- /**
- * xmlParseTry:
- * @ctxt: an XML parser context
- *
- * Try to progress on parsing
- *
- * Returns zero if no parsing was possible
- */
- int
- xmlParseTry(xmlParserCtxtPtr ctxt) {
- return(xmlParseTryOrFinish(ctxt, 0));
- }
- /**
- * xmlParseChunk:
- * @ctxt: an XML parser context
- * @chunk: an char array
- * @size: the size in byte of the chunk
- * @terminate: last chunk indicator
- *
- * Parse a Chunk of memory
- *
- * Returns zero if no error, the xmlParserErrors otherwise.
- */
- int
- xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
- int terminate) {
- if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
- (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
-
- xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: pushed %dn", size);
- #endif
- if ((terminate) || (ctxt->input->buf->buffer->use > 80))
- xmlParseTryOrFinish(ctxt, terminate);
- } else if (ctxt->instate != XML_PARSER_EOF)
- xmlParseTryOrFinish(ctxt, terminate);
- if (terminate) {
- /*
- * Grab the encoding if it was added on-the-fly
- */
- if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
- (ctxt->myDoc->encoding == NULL)) {
- ctxt->myDoc->encoding = ctxt->encoding;
- ctxt->encoding = NULL;
- }
- /*
- * Check for termination
- */
- if ((ctxt->instate != XML_PARSER_EOF) &&
- (ctxt->instate != XML_PARSER_EPILOG)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Extra content at the end of the documentn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_DOCUMENT_END;
- }
- if (ctxt->instate != XML_PARSER_EOF) {
- if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->endDocument(ctxt->userData);
- }
- ctxt->instate = XML_PARSER_EOF;
- }
- return((xmlParserErrors) ctxt->errNo);
- }
- /************************************************************************
- * *
- * I/O front end functions to the parser *
- * *
- ************************************************************************/
- /**
- * xmlCreatePushParserCtxt:
- * @sax: a SAX handler
- * @user_data: The user data returned on SAX callbacks
- * @chunk: a pointer to an array of chars
- * @size: number of chars in the array
- * @filename: an optional file name or URI
- *
- * Create a parser context for using the XML parser in push mode
- * To allow content encoding detection, @size should be >= 4
- * The value of @filename is used for fetching external entities
- * and error/warning reports.
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
- const char *chunk, int size, const char *filename) {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr inputStream;
- xmlParserInputBufferPtr buf;
- xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
- /*
- * plug some encoding conversion routines
- */
- if ((chunk != NULL) && (size >= 4))
- enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
- buf = xmlAllocParserInputBuffer(enc);
- if (buf == NULL) return(NULL);
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- xmlFree(buf);
- return(NULL);
- }
- if (sax != NULL) {
- if (ctxt->sax != &xmlDefaultSAXHandler)
- xmlFree(ctxt->sax);
- ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
- if (ctxt->sax == NULL) {
- xmlFree(buf);
- xmlFree(ctxt);
- return(NULL);
- }
- memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
- if (user_data != NULL)
- ctxt->userData = user_data;
- }
- if (filename == NULL) {
- ctxt->directory = NULL;
- } else {
- ctxt->directory = xmlParserGetDirectory(filename);
- }
- inputStream = xmlNewInputStream(ctxt);
- if (inputStream == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- if (filename == NULL)
- inputStream->filename = NULL;
- else
- inputStream->filename = xmlMemStrdup(filename);
- inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- if (enc != XML_CHAR_ENCODING_NONE) {
- xmlSwitchEncoding(ctxt, enc);
- }
- inputPush(ctxt, inputStream);
- if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
- (ctxt->input->buf != NULL)) {
- xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- #ifdef DEBUG_PUSH
- fprintf(stderr, "PP: pushed %dn", size);
- #endif
- }
- return(ctxt);
- }
- /**
- * xmlCreateIOParserCtxt:
- * @sax: a SAX handler
- * @user_data: The user data returned on SAX callbacks
- * @ioread: an I/O read function
- * @ioclose: an I/O close function
- * @ioctx: an I/O handler
- * @enc: the charset encoding if known
- *
- * Create a parser context for using the XML parser with an existing
- * I/O stream
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
- xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
- void *ioctx, xmlCharEncoding enc) {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr inputStream;
- xmlParserInputBufferPtr buf;
- buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
- if (buf == NULL) return(NULL);
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- xmlFree(buf);
- return(NULL);
- }
- if (sax != NULL) {
- if (ctxt->sax != &xmlDefaultSAXHandler)
- xmlFree(ctxt->sax);
- ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
- if (ctxt->sax == NULL) {
- xmlFree(buf);
- xmlFree(ctxt);
- return(NULL);
- }
- memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
- if (user_data != NULL)
- ctxt->userData = user_data;
- }
- inputStream = xmlNewIOInputStream(ctxt, buf, enc);
- if (inputStream == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- inputPush(ctxt, inputStream);
- return(ctxt);
- }
- /**
- * xmlCreateDocParserCtxt:
- * @cur: a pointer to an array of xmlChar
- *
- * Create a parser context for an XML in-memory document.
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreateDocParserCtxt(xmlChar *cur) {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr input;
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- return(NULL);
- }
- input = xmlNewInputStream(ctxt);
- if (input == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- input->base = cur;
- input->cur = cur;
- inputPush(ctxt, input);
- return(ctxt);
- }
- /**
- * xmlSAXParseDoc:
- * @sax: the SAX handler block
- * @cur: a pointer to an array of xmlChar
- * @recovery: work in recovery mode, i.e. tries to read no Well Formed
- * documents
- *
- * parse an XML in-memory document and build a tree.
- * It use the given SAX function block to handle the parsing callback.
- * If sax is NULL, fallback to the default DOM tree building routines.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
- xmlDocPtr ret;
- xmlParserCtxtPtr ctxt;
- if (cur == NULL) return(NULL);
- ctxt = xmlCreateDocParserCtxt(cur);
- if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
- ctxt->sax = sax;
- ctxt->userData = NULL;
- }
- xmlParseDocument(ctxt);
- if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
- else {
- ret = NULL;
- xmlFreeDoc(ctxt->myDoc);
- ctxt->myDoc = NULL;
- }
- if (sax != NULL)
- ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return(ret);
- }
- /**
- * xmlParseDoc:
- * @cur: a pointer to an array of xmlChar
- *
- * parse an XML in-memory document and build a tree.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlParseDoc(xmlChar *cur) {
- return(xmlSAXParseDoc(NULL, cur, 0));
- }
- /**
- * xmlSAXParseDTD:
- * @sax: the SAX handler block
- * @ExternalID: a NAME* containing the External ID of the DTD
- * @SystemID: a NAME* containing the URL to the DTD
- *
- * Load and parse an external subset.
- *
- * Returns the resulting xmlDtdPtr or NULL in case of error.
- */
- xmlDtdPtr
- xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
- const xmlChar *SystemID) {
- xmlDtdPtr ret = NULL;
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr input = NULL;
- xmlCharEncoding enc;
- if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- return(NULL);
- }
- /*
- * Set-up the SAX context
- */
- if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
- if (ctxt->sax != NULL)
- xmlFree(ctxt->sax);
- ctxt->sax = sax;
- ctxt->userData = NULL;
- }
- /*
- * Ask the Entity resolver to load the damn thing
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
- input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
- if (input == NULL) {
- if (sax != NULL) ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- /*
- * plug some encoding conversion routines here.
- */
- xmlPushInput(ctxt, input);
- enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
- xmlSwitchEncoding(ctxt, enc);
- if (input->filename == NULL)
- input->filename = (char *) xmlStrdup(SystemID);
- input->line = 1;
- input->col = 1;
- input->base = ctxt->input->cur;
- input->cur = ctxt->input->cur;
- input->free = NULL;
- /*
- * let's parse that entity knowing it's an external subset.
- */
- xmlParseExternalSubset(ctxt, ExternalID, SystemID);
- if (ctxt->myDoc != NULL) {
- if (ctxt->wellFormed) {
- ret = ctxt->myDoc->intSubset;
- ctxt->myDoc->intSubset = NULL;
- } else {
- ret = NULL;
- }
- xmlFreeDoc(ctxt->myDoc);
- ctxt->myDoc = NULL;
- }
- if (sax != NULL) ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return(ret);
- }
- /**
- * xmlParseDTD:
- * @ExternalID: a NAME* containing the External ID of the DTD
- * @SystemID: a NAME* containing the URL to the DTD
- *
- * Load and parse an external subset.
- *
- * Returns the resulting xmlDtdPtr or NULL in case of error.
- */
- xmlDtdPtr
- xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
- return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
- }
- /**
- * xmlSAXParseBalancedChunk:
- * @ctx: an XML parser context (possibly NULL)
- * @sax: the SAX handler bloc (possibly NULL)
- * @user_data: The user data returned on SAX callbacks (possibly NULL)
- * @input: a parser input stream
- * @enc: the encoding
- *
- * Parse a well-balanced chunk of an XML document
- * The user has to provide SAX callback block whose routines will be
- * called by the parser
- * The allowed sequence for the Well Balanced Chunk is the one defined by
- * the content production in the XML grammar:
- *
- * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
- *
- * Returns 0 if the chunk is well balanced, -1 in case of args problem and
- * the error code otherwise
- */
- int
- xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
- void *user_data, xmlParserInputPtr input,
- xmlCharEncoding enc) {
- xmlParserCtxtPtr ctxt;
- int ret;
- if (input == NULL) return(-1);
- if (ctx != NULL)
- ctxt = ctx;
- else {
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL)
- return(-1);
- if (sax == NULL)
- ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
- }
- /*
- * Set-up the SAX context
- */
- if (sax != NULL) {
- if (ctxt->sax != NULL)
- xmlFree(ctxt->sax);
- ctxt->sax = sax;
- ctxt->userData = user_data;
- }
- /*
- * plug some encoding conversion routines here.
- */
- xmlPushInput(ctxt, input);
- if (enc != XML_CHAR_ENCODING_NONE)
- xmlSwitchEncoding(ctxt, enc);
- /*
- * let's parse that entity knowing it's an external subset.
- */
- xmlParseContent(ctxt);
- ret = ctxt->errNo;
- if (ctx == NULL) {
- if (sax != NULL)
- ctxt->sax = NULL;
- else
- xmlFreeDoc(ctxt->myDoc);
- xmlFreeParserCtxt(ctxt);
- }
- return(ret);
- }
- /**
- * xmlParseExternalEntity:
- * @doc: the document the chunk pertains to
- * @sax: the SAX handler bloc (possibly NULL)
- * @user_data: The user data returned on SAX callbacks (possibly NULL)
- * @depth: Used for loop detection, use 0
- * @URL: the URL for the entity to load
- * @ID: the System ID for the entity to load
- * @list: the return value for the set of parsed nodes
- *
- * Parse an external general entity
- * An external general parsed entity is well-formed if it matches the
- * production labeled extParsedEnt.
- *
- * [78] extParsedEnt ::= TextDecl? content
- *
- * Returns 0 if the entity is well formed, -1 in case of args problem and
- * the parser error code otherwise
- */
- int
- xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
- int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
- xmlParserCtxtPtr ctxt;
- xmlDocPtr newDoc;
- xmlSAXHandlerPtr oldsax = NULL;
- int ret = 0;
- if (depth > 40) {
- return(XML_ERR_ENTITY_LOOP);
- }
- if (list != NULL)
- *list = NULL;
- if ((URL == NULL) && (ID == NULL))
- return(-1);
- ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
- if (ctxt == NULL) return(-1);
- ctxt->userData = ctxt;
- if (sax != NULL) {
- oldsax = ctxt->sax;
- ctxt->sax = sax;
- if (user_data != NULL)
- ctxt->userData = user_data;
- }
- newDoc = xmlNewDoc(BAD_CAST "1.0");
- if (newDoc == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(-1);
- }
- if (doc != NULL) {
- newDoc->intSubset = doc->intSubset;
- newDoc->extSubset = doc->extSubset;
- }
- if (doc->URL != NULL) {
- newDoc->URL = xmlStrdup(doc->URL);
- }
- newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
- if (newDoc->children == NULL) {
- if (sax != NULL)
- ctxt->sax = oldsax;
- xmlFreeParserCtxt(ctxt);
- newDoc->intSubset = NULL;
- newDoc->extSubset = NULL;
- xmlFreeDoc(newDoc);
- return(-1);
- }
- nodePush(ctxt, newDoc->children);
- if (doc == NULL) {
- ctxt->myDoc = newDoc;
- } else {
- ctxt->myDoc = doc;
- newDoc->children->doc = doc;
- }
- /*
- * Parse a possible text declaration first
- */
- GROW;
- if ((RAW == '<') && (NXT(1) == '?') &&
- (NXT(2) == 'x') && (NXT(3) == 'm') &&
- (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
- xmlParseTextDecl(ctxt);
- }
- /*
- * Doing validity checking on chunk doesn't make sense
- */
- ctxt->instate = XML_PARSER_CONTENT;
- ctxt->validate = 0;
- ctxt->depth = depth;
- xmlParseContent(ctxt);
-
- if ((RAW == '<') && (NXT(1) == '/')) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "chunk is not well balancedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
- } else if (RAW != 0) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "extra content at the end of well balanced chunkn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_EXTRA_CONTENT;
- }
- if (ctxt->node != newDoc->children) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "chunk is not well balancedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
- }
- if (!ctxt->wellFormed) {
- if (ctxt->errNo == 0)
- ret = 1;
- else
- ret = ctxt->errNo;
- } else {
- if (list != NULL) {
- xmlNodePtr cur;
- /*
- * Return the newly created nodeset after unlinking it from
- * they pseudo parent.
- */
- cur = newDoc->children->children;
- *list = cur;
- while (cur != NULL) {
- cur->parent = NULL;
- cur = cur->next;
- }
- newDoc->children->children = NULL;
- }
- ret = 0;
- }
- if (sax != NULL)
- ctxt->sax = oldsax;
- xmlFreeParserCtxt(ctxt);
- newDoc->intSubset = NULL;
- newDoc->extSubset = NULL;
- xmlFreeDoc(newDoc);
-
- return(ret);
- }
- /**
- * xmlParseBalancedChunk:
- * @doc: the document the chunk pertains to
- * @sax: the SAX handler bloc (possibly NULL)
- * @user_data: The user data returned on SAX callbacks (possibly NULL)
- * @depth: Used for loop detection, use 0
- * @string: the input string in UTF8 or ISO-Latin (zero terminated)
- * @list: the return value for the set of parsed nodes
- *
- * Parse a well-balanced chunk of an XML document
- * called by the parser
- * The allowed sequence for the Well Balanced Chunk is the one defined by
- * the content production in the XML grammar:
- *
- * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
- *
- * Returns 0 if the chunk is well balanced, -1 in case of args problem and
- * the parser error code otherwise
- */
- int
- xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
- void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
- xmlParserCtxtPtr ctxt;
- xmlDocPtr newDoc;
- xmlSAXHandlerPtr oldsax = NULL;
- int size;
- int ret = 0;
- if (depth > 40) {
- return(XML_ERR_ENTITY_LOOP);
- }
- if (list != NULL)
- *list = NULL;
- if (string == NULL)
- return(-1);
- size = xmlStrlen(string);
- ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
- if (ctxt == NULL) return(-1);
- ctxt->userData = ctxt;
- if (sax != NULL) {
- oldsax = ctxt->sax;
- ctxt->sax = sax;
- if (user_data != NULL)
- ctxt->userData = user_data;
- }
- newDoc = xmlNewDoc(BAD_CAST "1.0");
- if (newDoc == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(-1);
- }
- if (doc != NULL) {
- newDoc->intSubset = doc->intSubset;
- newDoc->extSubset = doc->extSubset;
- }
- newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
- if (newDoc->children == NULL) {
- if (sax != NULL)
- ctxt->sax = oldsax;
- xmlFreeParserCtxt(ctxt);
- newDoc->intSubset = NULL;
- newDoc->extSubset = NULL;
- xmlFreeDoc(newDoc);
- return(-1);
- }
- nodePush(ctxt, newDoc->children);
- if (doc == NULL) {
- ctxt->myDoc = newDoc;
- } else {
- ctxt->myDoc = doc;
- newDoc->children->doc = doc;
- }
- ctxt->instate = XML_PARSER_CONTENT;
- ctxt->depth = depth;
- /*
- * Doing validity checking on chunk doesn't make sense
- */
- ctxt->validate = 0;
- xmlParseContent(ctxt);
-
- if ((RAW == '<') && (NXT(1) == '/')) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "chunk is not well balancedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
- } else if (RAW != 0) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "extra content at the end of well balanced chunkn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_EXTRA_CONTENT;
- }
- if (ctxt->node != newDoc->children) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "chunk is not well balancedn");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
- }
- if (!ctxt->wellFormed) {
- if (ctxt->errNo == 0)
- ret = 1;
- else
- ret = ctxt->errNo;
- } else {
- if (list != NULL) {
- xmlNodePtr cur;
- /*
- * Return the newly created nodeset after unlinking it from
- * they pseudo parent.
- */
- cur = newDoc->children->children;
- *list = cur;
- while (cur != NULL) {
- cur->parent = NULL;
- cur = cur->next;
- }
- newDoc->children->children = NULL;
- }
- ret = 0;
- }
- if (sax != NULL)
- ctxt->sax = oldsax;
- xmlFreeParserCtxt(ctxt);
- newDoc->intSubset = NULL;
- newDoc->extSubset = NULL;
- xmlFreeDoc(newDoc);
-
- return(ret);
- }
- /**
- * xmlParseBalancedChunkFile:
- * @doc: the document the chunk pertains to
- *
- * Parse a well-balanced chunk of an XML document contained in a file
- *
- * Returns the resulting list of nodes resulting from the parsing,
- * they are not added to @node
- */
- xmlNodePtr
- xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
- /* TODO !!! */
- return(NULL);
- }
- /**
- * xmlRecoverDoc:
- * @cur: a pointer to an array of xmlChar
- *
- * parse an XML in-memory document and build a tree.
- * In the case the document is not Well Formed, a tree is built anyway
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlRecoverDoc(xmlChar *cur) {
- return(xmlSAXParseDoc(NULL, cur, 1));
- }
- /**
- * xmlCreateEntityParserCtxt:
- * @URL: the entity URL
- * @ID: the entity PUBLIC ID
- * @base: a posible base for the target URI
- *
- * Create a parser context for an external entity
- * Automatic support for ZLIB/Compress compressed document is provided
- * by default if found at compile-time.
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
- const xmlChar *base) {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr inputStream;
- char *directory = NULL;
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- return(NULL);
- }
- inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
- if (inputStream == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- inputPush(ctxt, inputStream);
- if ((ctxt->directory == NULL) && (directory == NULL))
- directory = xmlParserGetDirectory((char *)URL);
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = directory;
- return(ctxt);
- }
- /**
- * xmlCreateFileParserCtxt:
- * @filename: the filename
- *
- * Create a parser context for a file content.
- * Automatic support for ZLIB/Compress compressed document is provided
- * by default if found at compile-time.
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreateFileParserCtxt(const char *filename)
- {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr inputStream;
- xmlParserInputBufferPtr buf;
- char *directory = NULL;
- buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
- if (buf == NULL) return(NULL);
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL) {
- return(NULL);
- }
- inputStream = xmlNewInputStream(ctxt);
- if (inputStream == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- inputStream->filename = xmlMemStrdup(filename);
- inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputPush(ctxt, inputStream);
- if ((ctxt->directory == NULL) && (directory == NULL))
- directory = xmlParserGetDirectory(filename);
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = directory;
- return(ctxt);
- }
- /**
- * xmlSAXParseFile:
- * @sax: the SAX handler block
- * @filename: the filename
- * @recovery: work in recovery mode, i.e. tries to read no Well Formed
- * documents
- *
- * parse an XML file and build a tree. Automatic support for ZLIB/Compress
- * compressed document is provided by default if found at compile-time.
- * It use the given SAX function block to handle the parsing callback.
- * If sax is NULL, fallback to the default DOM tree building routines.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
- int recovery) {
- xmlDocPtr ret;
- xmlParserCtxtPtr ctxt;
- char *directory = NULL;
- ctxt = xmlCreateFileParserCtxt(filename);
- if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
- if (ctxt->sax != NULL)
- xmlFree(ctxt->sax);
- ctxt->sax = sax;
- ctxt->userData = NULL;
- }
- if ((ctxt->directory == NULL) && (directory == NULL))
- directory = xmlParserGetDirectory(filename);
- if ((ctxt->directory == NULL) && (directory != NULL))
- ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
- xmlParseDocument(ctxt);
- if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
- else {
- ret = NULL;
- xmlFreeDoc(ctxt->myDoc);
- ctxt->myDoc = NULL;
- }
- if (sax != NULL)
- ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return(ret);
- }
- /**
- * xmlParseFile:
- * @filename: the filename
- *
- * parse an XML file and build a tree. Automatic support for ZLIB/Compress
- * compressed document is provided by default if found at compile-time.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlParseFile(const char *filename) {
- return(xmlSAXParseFile(NULL, filename, 0));
- }
- /**
- * xmlRecoverFile:
- * @filename: the filename
- *
- * parse an XML file and build a tree. Automatic support for ZLIB/Compress
- * compressed document is provided by default if found at compile-time.
- * In the case the document is not Well Formed, a tree is built anyway
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlRecoverFile(const char *filename) {
- return(xmlSAXParseFile(NULL, filename, 1));
- }
- /**
- * xmlCreateMemoryParserCtxt:
- * @buffer: a pointer to a zero terminated char array
- * @size: the size of the array (without the trailing 0)
- *
- * Create a parser context for an XML in-memory document.
- *
- * Returns the new parser context or NULL
- */
- xmlParserCtxtPtr
- xmlCreateMemoryParserCtxt(char *buffer, int size) {
- xmlParserCtxtPtr ctxt;
- xmlParserInputPtr input;
- if (buffer[size] != 0)
- return(NULL);
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL)
- return(NULL);
- input = xmlNewInputStream(ctxt);
- if (input == NULL) {
- xmlFreeParserCtxt(ctxt);
- return(NULL);
- }
- input->filename = NULL;
- input->line = 1;
- input->col = 1;
- input->buf = NULL;
- input->consumed = 0;
- input->base = BAD_CAST buffer;
- input->cur = BAD_CAST buffer;
- input->free = NULL;
- inputPush(ctxt, input);
- return(ctxt);
- }
- /**
- * xmlSAXParseMemory:
- * @sax: the SAX handler block
- * @buffer: an pointer to a char array
- * @size: the size of the array
- * @recovery: work in recovery mode, i.e. tries to read not Well Formed
- * documents
- *
- * parse an XML in-memory block and use the given SAX function block
- * to handle the parsing callback. If sax is NULL, fallback to the default
- * DOM tree building routines.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr
- xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
- xmlDocPtr ret;
- xmlParserCtxtPtr ctxt;
- ctxt = xmlCreateMemoryParserCtxt(buffer, size);
- if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
- ctxt->sax = sax;
- ctxt->userData = NULL;
- }
- xmlParseDocument(ctxt);
- if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
- else {
- ret = NULL;
- xmlFreeDoc(ctxt->myDoc);
- ctxt->myDoc = NULL;
- }
- if (sax != NULL)
- ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return(ret);
- }
- /**
- * xmlParseMemory:
- * @buffer: an pointer to a char array
- * @size: the size of the array
- *
- * parse an XML in-memory block and build a tree.
- *
- * Returns the resulting document tree
- */
- xmlDocPtr xmlParseMemory(char *buffer, int size) {
- return(xmlSAXParseMemory(NULL, buffer, size, 0));
- }
- /**
- * xmlRecoverMemory:
- * @buffer: an pointer to a char array
- * @size: the size of the array
- *
- * parse an XML in-memory block and build a tree.
- * In the case the document is not Well Formed, a tree is built anyway
- *
- * Returns the resulting document tree
- */
- xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
- return(xmlSAXParseMemory(NULL, buffer, size, 1));
- }
- /**
- * xmlSetupParserForBuffer:
- * @ctxt: an XML parser context
- * @buffer: a xmlChar * buffer
- * @filename: a file name
- *
- * Setup the parser context to parse a new buffer; Clears any prior
- * contents from the parser context. The buffer parameter must not be
- * NULL, but the filename parameter can be
- */
- void
- xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
- const char* filename)
- {
- xmlParserInputPtr input;
- input = xmlNewInputStream(ctxt);
- if (input == NULL) {
- perror("malloc");
- xmlFree(ctxt);
- return;
- }
-
- xmlClearParserCtxt(ctxt);
- if (filename != NULL)
- input->filename = xmlMemStrdup(filename);
- input->base = buffer;
- input->cur = buffer;
- inputPush(ctxt, input);
- }
- /**
- * xmlSAXUserParseFile:
- * @sax: a SAX handler
- * @user_data: The user data returned on SAX callbacks
- * @filename: a file name
- *
- * parse an XML file and call the given SAX handler routines.
- * Automatic support for ZLIB/Compress compressed document is provided
- *
- * Returns 0 in case of success or a error number otherwise
- */
- int
- xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
- const char *filename) {
- int ret = 0;
- xmlParserCtxtPtr ctxt;
-
- ctxt = xmlCreateFileParserCtxt(filename);
- if (ctxt == NULL) return -1;
- if (ctxt->sax != &xmlDefaultSAXHandler)
- xmlFree(ctxt->sax);
- ctxt->sax = sax;
- if (user_data != NULL)
- ctxt->userData = user_data;
-
- xmlParseDocument(ctxt);
-
- if (ctxt->wellFormed)
- ret = 0;
- else {
- if (ctxt->errNo != 0)
- ret = ctxt->errNo;
- else
- ret = -1;
- }
- if (sax != NULL)
- ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return ret;
- }
- /**
- * xmlSAXUserParseMemory:
- * @sax: a SAX handler
- * @user_data: The user data returned on SAX callbacks
- * @buffer: an in-memory XML document input
- * @size: the length of the XML document in bytes
- *
- * A better SAX parsing routine.
- * parse an XML in-memory buffer and call the given SAX handler routines.
- *
- * Returns 0 in case of success or a error number otherwise
- */
- int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
- char *buffer, int size) {
- int ret = 0;
- xmlParserCtxtPtr ctxt;
-
- ctxt = xmlCreateMemoryParserCtxt(buffer, size);
- if (ctxt == NULL) return -1;
- ctxt->sax = sax;
- ctxt->userData = user_data;
-
- xmlParseDocument(ctxt);
-
- if (ctxt->wellFormed)
- ret = 0;
- else {
- if (ctxt->errNo != 0)
- ret = ctxt->errNo;
- else
- ret = -1;
- }
- if (sax != NULL)
- ctxt->sax = NULL;
- xmlFreeParserCtxt(ctxt);
-
- return ret;
- }
- /************************************************************************
- * *
- * Miscellaneous *
- * *
- ************************************************************************/
- /**
- * xmlCleanupParser:
- *
- * Cleanup function for the XML parser. It tries to reclaim all
- * parsing related global memory allocated for the parser processing.
- * It doesn't deallocate any document related memory. Calling this
- * function should not prevent reusing the parser.
- */
- void
- xmlCleanupParser(void) {
- xmlCleanupCharEncodingHandlers();
- xmlCleanupPredefinedEntities();
- }
- /**
- * xmlParserFindNodeInfo:
- * @ctxt: an XML parser context
- * @node: an XML node within the tree
- *
- * Find the parser node info struct for a given node
- *
- * Returns an xmlParserNodeInfo block pointer or NULL
- */
- const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
- const xmlNode* node)
- {
- unsigned long pos;
- /* Find position where node should be at */
- pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
- if ( ctx->node_seq.buffer[pos].node == node )
- return &ctx->node_seq.buffer[pos];
- else
- return NULL;
- }
- /**
- * xmlInitNodeInfoSeq:
- * @seq: a node info sequence pointer
- *
- * -- Initialize (set to initial state) node info sequence
- */
- void
- xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
- {
- seq->length = 0;
- seq->maximum = 0;
- seq->buffer = NULL;
- }
- /**
- * xmlClearNodeInfoSeq:
- * @seq: a node info sequence pointer
- *
- * -- Clear (release memory and reinitialize) node
- * info sequence
- */
- void
- xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
- {
- if ( seq->buffer != NULL )
- xmlFree(seq->buffer);
- xmlInitNodeInfoSeq(seq);
- }
- /**
- * xmlParserFindNodeInfoIndex:
- * @seq: a node info sequence pointer
- * @node: an XML node pointer
- *
- *
- * xmlParserFindNodeInfoIndex : Find the index that the info record for
- * the given node is or should be at in a sorted sequence
- *
- * Returns a long indicating the position of the record
- */
- unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
- const xmlNode* node)
- {
- unsigned long upper, lower, middle;
- int found = 0;
- /* Do a binary search for the key */
- lower = 1;
- upper = seq->length;
- middle = 0;
- while ( lower <= upper && !found) {
- middle = lower + (upper - lower) / 2;
- if ( node == seq->buffer[middle - 1].node )
- found = 1;
- else if ( node < seq->buffer[middle - 1].node )
- upper = middle - 1;
- else
- lower = middle + 1;
- }
- /* Return position */
- if ( middle == 0 || seq->buffer[middle - 1].node < node )
- return middle;
- else
- return middle - 1;
- }
- /**
- * xmlParserAddNodeInfo:
- * @ctxt: an XML parser context
- * @info: a node info sequence pointer
- *
- * Insert node info record into the sorted sequence
- */
- void
- xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
- const xmlParserNodeInfo* info)
- {
- unsigned long pos;
- static unsigned int block_size = 5;
- /* Find pos and check to see if node is already in the sequence */
- pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
- if ( pos < ctxt->node_seq.length
- && ctxt->node_seq.buffer[pos].node == info->node ) {
- ctxt->node_seq.buffer[pos] = *info;
- }
- /* Otherwise, we need to add new node to buffer */
- else {
- /* Expand buffer by 5 if needed */
- if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
- xmlParserNodeInfo* tmp_buffer;
- unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
- *(ctxt->node_seq.maximum + block_size));
- if ( ctxt->node_seq.buffer == NULL )
- tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
- else
- tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
- if ( tmp_buffer == NULL ) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "Out of memoryn");
- ctxt->errNo = XML_ERR_NO_MEMORY;
- return;
- }
- ctxt->node_seq.buffer = tmp_buffer;
- ctxt->node_seq.maximum += block_size;
- }
- /* If position is not at end, move elements out of the way */
- if ( pos != ctxt->node_seq.length ) {
- unsigned long i;
- for ( i = ctxt->node_seq.length; i > pos; i-- )
- ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
- }
-
- /* Copy element and increase length */
- ctxt->node_seq.buffer[pos] = *info;
- ctxt->node_seq.length++;
- }
- }
- /**
- * xmlSubstituteEntitiesDefault:
- * @val: int 0 or 1
- *
- * Set and return the previous value for default entity support.
- * Initially the parser always keep entity references instead of substituting
- * entity values in the output. This function has to be used to change the
- * default parser behaviour
- * SAX::subtituteEntities() has to be used for changing that on a file by
- * file basis.
- *
- * Returns the last value for 0 for no substitution, 1 for substitution.
- */
- int
- xmlSubstituteEntitiesDefault(int val) {
- int old = xmlSubstituteEntitiesDefaultValue;
- xmlSubstituteEntitiesDefaultValue = val;
- return(old);
- }
- /**
- * xmlKeepBlanksDefault:
- * @val: int 0 or 1
- *
- * Set and return the previous value for default blanks text nodes support.
- * The 1.x version of the parser used an heuristic to try to detect
- * ignorable white spaces. As a result the SAX callback was generating
- * ignorableWhitespace() callbacks instead of characters() one, and when
- * using the DOM output text nodes containing those blanks were not generated.
- * The 2.x and later version will switch to the XML standard way and
- * ignorableWhitespace() are only generated when running the parser in
- * validating mode and when the current element doesn't allow CDATA or
- * mixed content.
- * This function is provided as a way to force the standard behaviour
- * on 1.X libs and to switch back to the old mode for compatibility when
- * running 1.X client code on 2.X . Upgrade of 1.X code should be done
- * by using xmlIsBlankNode() commodity function to detect the "empty"
- * nodes generated.
- * This value also affect autogeneration of indentation when saving code
- * if blanks sections are kept, indentation is not generated.
- *
- * Returns the last value for 0 for no substitution, 1 for substitution.
- */
- int
- xmlKeepBlanksDefault(int val) {
- int old = xmlKeepBlanksDefaultValue;
- xmlKeepBlanksDefaultValue = val;
- xmlIndentTreeOutput = !val;
- return(old);
- }