词法分析

开发平台：
Visual C++

EncodingTest.cpp：源码内容
							/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
//---------------------------------------------------------------------
//
//  This test program is used, in conjunction with a set of test data files,
//  to verify support for different character encodings in XML.
//
//---------------------------------------------------------------------
// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/framework/XMLBuffer.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/XMLException.hpp>
#include <xercesc/sax/SAXException.hpp>
#include <xercesc/sax/ErrorHandler.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <stdio.h>
XERCES_CPP_NAMESPACE_USE
static int gTestsFailed = 0;
static int gTestsRun    = 0;
static XercesDOMParser* parser = 0;
//-----------------------------------------------------------------------
//
//  ErrorHandler.   The DOM Parser will report any parsing errors by means
//                  of call-backs to the methods of this class.
//                  This is just necessary boilerplate, as far as this
//                  program is concerned.
//
//-----------------------------------------------------------------------
class  ParseErrorHandler: public ErrorHandler
{
public:
    void warning(const SAXParseException& e);
    void error(const SAXParseException& e);
    void fatalError(const SAXParseException& e);
    void resetErrors() {};
};
void ParseErrorHandler::error(const SAXParseException& e)
{
    char* systemId = XMLString::transcode(e.getSystemId());
    char* message = XMLString::transcode(e.getMessage());
    fprintf(stderr, "nError at file "%s", line %d, char %d:  %sn",
        systemId, e.getLineNumber(),
        e.getColumnNumber(), message);
    XMLString::release(&systemId);
    XMLString::release(&message);
    throw e;
};
void ParseErrorHandler::fatalError(const SAXParseException& e)
{
    char* systemId = XMLString::transcode(e.getSystemId());
    char* message = XMLString::transcode(e.getMessage());
    fprintf(stderr, "nFatal Error at file "%s", line %d, char %d:  %sn",
        systemId, e.getLineNumber(),
        e.getColumnNumber(), message);
    XMLString::release(&systemId);
    XMLString::release(&message);
    throw e;
};
void ParseErrorHandler::warning(const SAXParseException& e)
{
    char* systemId = XMLString::transcode(e.getSystemId());
    char* message = XMLString::transcode(e.getMessage());
    fprintf(stderr, "nWarning at file "%s", line %d, char %d:  %sn",
        systemId, e.getLineNumber(),
        e.getColumnNumber(), message);
    XMLString::release(&systemId);
    XMLString::release(&message);
    throw e;
};
//------------------------------------------------------------------------
//
//   parseFile  - a simpler to use function for just parsing an XML file
//                and getting the DOM Document back.
//
//------------------------------------------------------------------------
static DOMDocument* parseFile(char *fileName)
{
    ParseErrorHandler eh;
    if (!parser)
        parser = new XercesDOMParser;
    parser->setDoValidation(false);
    parser->setErrorHandler(&eh);
    try
    {
        parser->parse(fileName);
    }
    catch (const XMLException& e )
    {
		fprintf(stderr, "Exception Occurred "%s".  n",
			XMLString::transcode(e.getMessage()));
		fprintf(stderr, "File being parsed is "%s".n", fileName);
        return 0;  // A null document.
    }
	catch (...)
	{
		fprintf(stderr, "Unexpected Exception thrown during parse of file "%s".n",
		                 fileName);
		return 0;
	}
    return parser->getDocument();
}
//------------------------------------------------------------------------
//
//  writeUData - Write out a udata xml element for a XMLCh* contents.
//
//------------------------------------------------------------------------
static void writeUData(const XMLCh* s)
{
    unsigned int i;
    printf("<udata>n");
    size_t len = XMLString::stringLen(s);
    for (i=0; i<len; i++)
    {
        if (i % 16 == 0)
            printf("n");
        XMLCh c = s[i];
        printf("%4x ", c);
    }
    printf("n</udata>n");
};
//------------------------------------------------------------------------
//
//  eatWhiteSpace -  XMLCh*s are kind of short on utility functions :-(
//
//------------------------------------------------------------------------
static void eatWhiteSpace(XMLCh* s, unsigned int &i)
{
    while (i < XMLString::stringLen(s))
    {
    XMLCh c = s[i];
    if (!(c == 0x20 ||           // These are the official XML space characters,
        c == 0x09 ||             //   expressed as Unicode constants.
        c == 0x0A))
        break;
    i++;
    }
}
//------------------------------------------------------------------------
//
//   convertHexValue     if the XMLCh* contains a hex number at position i,
//                       convert it and return it, and update i to index the
//                       first char not in the string.
//                       return 0 if string[i] didn't have a hex digit.
//                       0 return is ambiguous, but it doesn't matter for XML,
//                       where 0 is not a valid character.
//
//------------------------------------------------------------------------
static int convertHexValue(XMLCh* s, unsigned int &i)
{
    int value = 0;
                                   // For reference, the digits  0-9 are Unicode 0x30-39
                                   //                the letters A-F are Unicode 0x41-0x46
                                   //                the letters a-f are Unicode 0x61-66
                                   // We can't use character literals - we might be
                                   //  building on an EBCDIC machine.
    while (i < XMLString::stringLen(s))
    {
        XMLCh c = s[i];
        if (c >= 0x61 && c <= 0x66)     // Uppercase a-f to A-F.
            c -= 0x20;
        if (c < 0x30 || c >0x46)        // Stop if not a hex digit
            break;
        if (c > 0x39 && c <0x41)
            break;
        value = value << 4;             // Append this digit to accumulating value
        if (c <= 0x39)
            value += c-0x30;
        else
            value += 0xA + c - 0x41;
        i++;
    }
    return value;
}
//------------------------------------------------------------------------
//
//  processTestFile   Given the file name of an encoding test xml file,
//                    run it.
//
//------------------------------------------------------------------------
static bool  processTestFile(const XMLCh* fileName)
{
    //
    //  Send the input file through the parse, create a DOM document for it.
    //
    char cFileName[4000];
    XMLString::transcode(fileName, cFileName, 3999);
    DOMDocument* testDoc = parseFile(cFileName);
    if (testDoc == 0)
        return false;    // parse errors in the source xml.
    //
    //  Pull the "data" element out of the document.
    //
    XMLCh tempStr[4000];
    XMLString::transcode("data", tempStr, 3999);
    DOMNodeList* nl = testDoc->getElementsByTagName(tempStr);
    if (nl->getLength() != 1) {
        fprintf(stderr, "Test file "%s" must have exactly one "data" element.n", cFileName);
        return false;
    };
    DOMNode* tmpNode = nl->item(0);
    DOMElement* data = (DOMElement*) tmpNode;
    //
    //  Build up a string containing the character data contents of the data element.
    //
    DOMNode* child;
    XMLBuffer elData;
    for (child=data->getFirstChild(); child != 0; child= child->getNextSibling())
    {
		if (child->getNodeType() == DOMNode::COMMENT_NODE)
			continue;
        if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
               child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
               child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
        {
               fprintf(stderr, "Test file "%s": data element contains unexpected children.",
                    cFileName);
               return false;
        }
        elData.append(((DOMCharacterData *)child)->getData());
    };
    //
    //  Pull the "udata" element out of the document
    //
    XMLString::transcode("udata", tempStr, 3999);
    nl = testDoc->getElementsByTagName(tempStr);
    if (nl->getLength() != 1) {
        fprintf(stderr, "Test file "%s" must have exactly one "udata" element.n", cFileName);
        return false;
    };
    DOMNode* tmpNode1 = nl->item(0);
    DOMElement* udata = (DOMElement*) tmpNode1;
    //
    //  Build up a string containing the character data contents of the udata element.
    //  This will consist of a whole bunch hex numbers, still in string from
    //
    XMLBuffer rawUData;
    for (child=udata->getFirstChild(); child != 0; child= child->getNextSibling())
    {
        if (child->getNodeType() == DOMNode::COMMENT_NODE)
            continue;
        if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
            child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
            child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
        {
            fprintf(stderr, "Test file "%s": udata element contains unexpected children.",
                cFileName);
            return false;
        }
        rawUData.append(((DOMCharacterData *)child)->getData());
    };
    //
    // Convert the raw (hex numbers)  form of the udata to the corresponding string.
    //
    XMLBuffer uData;
    unsigned int rawIndex = 0;
    while (rawIndex < rawUData.getLen())
    {
        eatWhiteSpace(rawUData.getRawBuffer(), rawIndex);
        XMLCh c = convertHexValue(rawUData.getRawBuffer(), rawIndex);
        if (c > 0)
            uData.append(c);
        else
            if (rawIndex < rawUData.getLen())
            {
                fprintf(stderr, "Test file "%s": Bad hex number in udata element.  "
                    "Data character number %dn", cFileName, uData.getLen());
                return false;
            }
    }
    //
    // Compare the two strings.
    //
    unsigned int i;
    for (i=0; i< elData.getLen(); i++)
    {
        XMLCh* elDataRaw = elData.getRawBuffer();
        XMLCh* uDataRaw = uData.getRawBuffer();
        if (i >= uData.getLen())
        {
            fprintf(stderr, "Test file "%s": udata element shorter than data at char number %dn",
                cFileName, i);
            writeUData(elDataRaw);
            return false;
        }
        if (uDataRaw[i] != elDataRaw[i])
        {
            fprintf(stderr, "Test file "%s": comparison failure at character number %dn",
                cFileName, i);
            writeUData(elDataRaw);
            return false;
        };
    }
    if (elData.getLen() != uData.getLen())
    {
        fprintf(stderr, "Test file "%s": udata element longer than data at char number %dn",
            cFileName, i);
        writeUData(elData.getRawBuffer());
        return false;
    }
    return true;
}
int main(int argc, char ** argv) {
   //
    // Initialize the Xerces-c environment
    //
	try
    {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch)
    {
        fprintf(stderr, "Error during initialization of xerces-c: %sn",
            XMLString::transcode(toCatch.getMessage()));
         return 1;
    }
    //
    // Parse the command line, which should specify exactly one file, which is an
    //   xml file containing the list of test files to be processed.
    //
    if (argc != 2) {
        printf("usage: %s file_name n"
               "   where file name is the xml file specifying the list of test files.", argv[0]);
        return 1;
    }
    DOMDocument* fileListDoc = parseFile(argv[1]);
    if (fileListDoc == 0) return 1;
    //
    // Iterate over the list of files, running each as a test.
    //
    XMLCh tempStr[4000];
    XMLString::transcode("testFile", tempStr, 3999);
    DOMNodeList* list = fileListDoc->getElementsByTagName(tempStr);
    int i;
    int numFiles = list->getLength();
    for (i=0; i<numFiles; i++)
    {
        ++gTestsRun;
        DOMNode* tmpNode3 = list->item(i);
        XMLString::transcode("name", tempStr, 3999);
        const XMLCh* fileName = ((DOMElement*) tmpNode3)->getAttribute(tempStr);
        if (processTestFile(fileName) == false)
            ++gTestsFailed;
    };
    //
    // We are done.  Print out a summary of the results
    //
    printf("Encoding Tests Results Summary: n"
           "   %d encoding tests run.n"
           "   %d tests passed,n"
           "   %d tests failedn", gTestsRun, gTestsRun-gTestsFailed, gTestsFailed);
    delete parser;
    parser = 0;
   return 0;
};