EncodingTest.cpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:16k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 1999-2000 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- //---------------------------------------------------------------------
- //
- // This test program is used, in conjunction with a set of test data files,
- // to verify support for different character encodings in XML.
- //
- //---------------------------------------------------------------------
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <xercesc/framework/XMLBuffer.hpp>
- #include <xercesc/util/PlatformUtils.hpp>
- #include <xercesc/util/XMLString.hpp>
- #include <xercesc/util/XMLException.hpp>
- #include <xercesc/sax/SAXException.hpp>
- #include <xercesc/sax/ErrorHandler.hpp>
- #include <xercesc/sax/SAXParseException.hpp>
- #include <xercesc/parsers/XercesDOMParser.hpp>
- #include <xercesc/dom/DOM.hpp>
- #include <stdio.h>
- XERCES_CPP_NAMESPACE_USE
- static int gTestsFailed = 0;
- static int gTestsRun = 0;
- static XercesDOMParser* parser = 0;
- //-----------------------------------------------------------------------
- //
- // ErrorHandler. The DOM Parser will report any parsing errors by means
- // of call-backs to the methods of this class.
- // This is just necessary boilerplate, as far as this
- // program is concerned.
- //
- //-----------------------------------------------------------------------
- class ParseErrorHandler: public ErrorHandler
- {
- public:
- void warning(const SAXParseException& e);
- void error(const SAXParseException& e);
- void fatalError(const SAXParseException& e);
- void resetErrors() {};
- };
- void ParseErrorHandler::error(const SAXParseException& e)
- {
- char* systemId = XMLString::transcode(e.getSystemId());
- char* message = XMLString::transcode(e.getMessage());
- fprintf(stderr, "nError at file "%s", line %d, char %d: %sn",
- systemId, e.getLineNumber(),
- e.getColumnNumber(), message);
- XMLString::release(&systemId);
- XMLString::release(&message);
- throw e;
- };
- void ParseErrorHandler::fatalError(const SAXParseException& e)
- {
- char* systemId = XMLString::transcode(e.getSystemId());
- char* message = XMLString::transcode(e.getMessage());
- fprintf(stderr, "nFatal Error at file "%s", line %d, char %d: %sn",
- systemId, e.getLineNumber(),
- e.getColumnNumber(), message);
- XMLString::release(&systemId);
- XMLString::release(&message);
- throw e;
- };
- void ParseErrorHandler::warning(const SAXParseException& e)
- {
- char* systemId = XMLString::transcode(e.getSystemId());
- char* message = XMLString::transcode(e.getMessage());
- fprintf(stderr, "nWarning at file "%s", line %d, char %d: %sn",
- systemId, e.getLineNumber(),
- e.getColumnNumber(), message);
- XMLString::release(&systemId);
- XMLString::release(&message);
- throw e;
- };
- //------------------------------------------------------------------------
- //
- // parseFile - a simpler to use function for just parsing an XML file
- // and getting the DOM Document back.
- //
- //------------------------------------------------------------------------
- static DOMDocument* parseFile(char *fileName)
- {
- ParseErrorHandler eh;
- if (!parser)
- parser = new XercesDOMParser;
- parser->setDoValidation(false);
- parser->setErrorHandler(&eh);
- try
- {
- parser->parse(fileName);
- }
- catch (const XMLException& e )
- {
- fprintf(stderr, "Exception Occurred "%s". n",
- XMLString::transcode(e.getMessage()));
- fprintf(stderr, "File being parsed is "%s".n", fileName);
- return 0; // A null document.
- }
- catch (...)
- {
- fprintf(stderr, "Unexpected Exception thrown during parse of file "%s".n",
- fileName);
- return 0;
- }
- return parser->getDocument();
- }
- //------------------------------------------------------------------------
- //
- // writeUData - Write out a udata xml element for a XMLCh* contents.
- //
- //------------------------------------------------------------------------
- static void writeUData(const XMLCh* s)
- {
- unsigned int i;
- printf("<udata>n");
- size_t len = XMLString::stringLen(s);
- for (i=0; i<len; i++)
- {
- if (i % 16 == 0)
- printf("n");
- XMLCh c = s[i];
- printf("%4x ", c);
- }
- printf("n</udata>n");
- };
- //------------------------------------------------------------------------
- //
- // eatWhiteSpace - XMLCh*s are kind of short on utility functions :-(
- //
- //------------------------------------------------------------------------
- static void eatWhiteSpace(XMLCh* s, unsigned int &i)
- {
- while (i < XMLString::stringLen(s))
- {
- XMLCh c = s[i];
- if (!(c == 0x20 || // These are the official XML space characters,
- c == 0x09 || // expressed as Unicode constants.
- c == 0x0A))
- break;
- i++;
- }
- }
- //------------------------------------------------------------------------
- //
- // convertHexValue if the XMLCh* contains a hex number at position i,
- // convert it and return it, and update i to index the
- // first char not in the string.
- // return 0 if string[i] didn't have a hex digit.
- // 0 return is ambiguous, but it doesn't matter for XML,
- // where 0 is not a valid character.
- //
- //------------------------------------------------------------------------
- static int convertHexValue(XMLCh* s, unsigned int &i)
- {
- int value = 0;
- // For reference, the digits 0-9 are Unicode 0x30-39
- // the letters A-F are Unicode 0x41-0x46
- // the letters a-f are Unicode 0x61-66
- // We can't use character literals - we might be
- // building on an EBCDIC machine.
- while (i < XMLString::stringLen(s))
- {
- XMLCh c = s[i];
- if (c >= 0x61 && c <= 0x66) // Uppercase a-f to A-F.
- c -= 0x20;
- if (c < 0x30 || c >0x46) // Stop if not a hex digit
- break;
- if (c > 0x39 && c <0x41)
- break;
- value = value << 4; // Append this digit to accumulating value
- if (c <= 0x39)
- value += c-0x30;
- else
- value += 0xA + c - 0x41;
- i++;
- }
- return value;
- }
- //------------------------------------------------------------------------
- //
- // processTestFile Given the file name of an encoding test xml file,
- // run it.
- //
- //------------------------------------------------------------------------
- static bool processTestFile(const XMLCh* fileName)
- {
- //
- // Send the input file through the parse, create a DOM document for it.
- //
- char cFileName[4000];
- XMLString::transcode(fileName, cFileName, 3999);
- DOMDocument* testDoc = parseFile(cFileName);
- if (testDoc == 0)
- return false; // parse errors in the source xml.
- //
- // Pull the "data" element out of the document.
- //
- XMLCh tempStr[4000];
- XMLString::transcode("data", tempStr, 3999);
- DOMNodeList* nl = testDoc->getElementsByTagName(tempStr);
- if (nl->getLength() != 1) {
- fprintf(stderr, "Test file "%s" must have exactly one "data" element.n", cFileName);
- return false;
- };
- DOMNode* tmpNode = nl->item(0);
- DOMElement* data = (DOMElement*) tmpNode;
- //
- // Build up a string containing the character data contents of the data element.
- //
- DOMNode* child;
- XMLBuffer elData;
- for (child=data->getFirstChild(); child != 0; child= child->getNextSibling())
- {
- if (child->getNodeType() == DOMNode::COMMENT_NODE)
- continue;
- if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
- child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
- child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
- {
- fprintf(stderr, "Test file "%s": data element contains unexpected children.",
- cFileName);
- return false;
- }
- elData.append(((DOMCharacterData *)child)->getData());
- };
- //
- // Pull the "udata" element out of the document
- //
- XMLString::transcode("udata", tempStr, 3999);
- nl = testDoc->getElementsByTagName(tempStr);
- if (nl->getLength() != 1) {
- fprintf(stderr, "Test file "%s" must have exactly one "udata" element.n", cFileName);
- return false;
- };
- DOMNode* tmpNode1 = nl->item(0);
- DOMElement* udata = (DOMElement*) tmpNode1;
- //
- // Build up a string containing the character data contents of the udata element.
- // This will consist of a whole bunch hex numbers, still in string from
- //
- XMLBuffer rawUData;
- for (child=udata->getFirstChild(); child != 0; child= child->getNextSibling())
- {
- if (child->getNodeType() == DOMNode::COMMENT_NODE)
- continue;
- if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
- child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
- child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
- {
- fprintf(stderr, "Test file "%s": udata element contains unexpected children.",
- cFileName);
- return false;
- }
- rawUData.append(((DOMCharacterData *)child)->getData());
- };
- //
- // Convert the raw (hex numbers) form of the udata to the corresponding string.
- //
- XMLBuffer uData;
- unsigned int rawIndex = 0;
- while (rawIndex < rawUData.getLen())
- {
- eatWhiteSpace(rawUData.getRawBuffer(), rawIndex);
- XMLCh c = convertHexValue(rawUData.getRawBuffer(), rawIndex);
- if (c > 0)
- uData.append(c);
- else
- if (rawIndex < rawUData.getLen())
- {
- fprintf(stderr, "Test file "%s": Bad hex number in udata element. "
- "Data character number %dn", cFileName, uData.getLen());
- return false;
- }
- }
- //
- // Compare the two strings.
- //
- unsigned int i;
- for (i=0; i< elData.getLen(); i++)
- {
- XMLCh* elDataRaw = elData.getRawBuffer();
- XMLCh* uDataRaw = uData.getRawBuffer();
- if (i >= uData.getLen())
- {
- fprintf(stderr, "Test file "%s": udata element shorter than data at char number %dn",
- cFileName, i);
- writeUData(elDataRaw);
- return false;
- }
- if (uDataRaw[i] != elDataRaw[i])
- {
- fprintf(stderr, "Test file "%s": comparison failure at character number %dn",
- cFileName, i);
- writeUData(elDataRaw);
- return false;
- };
- }
- if (elData.getLen() != uData.getLen())
- {
- fprintf(stderr, "Test file "%s": udata element longer than data at char number %dn",
- cFileName, i);
- writeUData(elData.getRawBuffer());
- return false;
- }
- return true;
- }
- int main(int argc, char ** argv) {
- //
- // Initialize the Xerces-c environment
- //
- try
- {
- XMLPlatformUtils::Initialize();
- }
- catch (const XMLException& toCatch)
- {
- fprintf(stderr, "Error during initialization of xerces-c: %sn",
- XMLString::transcode(toCatch.getMessage()));
- return 1;
- }
- //
- // Parse the command line, which should specify exactly one file, which is an
- // xml file containing the list of test files to be processed.
- //
- if (argc != 2) {
- printf("usage: %s file_name n"
- " where file name is the xml file specifying the list of test files.", argv[0]);
- return 1;
- }
- DOMDocument* fileListDoc = parseFile(argv[1]);
- if (fileListDoc == 0) return 1;
- //
- // Iterate over the list of files, running each as a test.
- //
- XMLCh tempStr[4000];
- XMLString::transcode("testFile", tempStr, 3999);
- DOMNodeList* list = fileListDoc->getElementsByTagName(tempStr);
- int i;
- int numFiles = list->getLength();
- for (i=0; i<numFiles; i++)
- {
- ++gTestsRun;
- DOMNode* tmpNode3 = list->item(i);
- XMLString::transcode("name", tempStr, 3999);
- const XMLCh* fileName = ((DOMElement*) tmpNode3)->getAttribute(tempStr);
- if (processTestFile(fileName) == false)
- ++gTestsFailed;
- };
- //
- // We are done. Print out a summary of the results
- //
- printf("Encoding Tests Results Summary: n"
- " %d encoding tests run.n"
- " %d tests passed,n"
- " %d tests failedn", gTestsRun, gTestsRun-gTestsFailed, gTestsFailed);
- delete parser;
- parser = 0;
- return 0;
- };