XMLUCSTranscoder.cpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:10k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 1999-2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <xercesc/util/BitOps.hpp>
- #include <xercesc/util/XMLUCS4Transcoder.hpp>
- #include <xercesc/util/TranscodingException.hpp>
- #include <string.h>
- XERCES_CPP_NAMESPACE_BEGIN
- // ---------------------------------------------------------------------------
- // XMLUCS4Transcoder: Constructors and Destructor
- // ---------------------------------------------------------------------------
- XMLUCS4Transcoder::XMLUCS4Transcoder(const XMLCh* const encodingName
- , const unsigned int blockSize
- , const bool swapped) :
- XMLTranscoder(encodingName, blockSize)
- , fSwapped(swapped)
- {
- }
- XMLUCS4Transcoder::~XMLUCS4Transcoder()
- {
- }
- // ---------------------------------------------------------------------------
- // XMLUCS4Transcoder: Implementation of the transcoder API
- // ---------------------------------------------------------------------------
- unsigned int
- XMLUCS4Transcoder::transcodeFrom(const XMLByte* const srcData
- , const unsigned int srcCount
- , XMLCh* const toFill
- , const unsigned int maxChars
- , unsigned int& bytesEaten
- , unsigned char* const charSizes)
- {
- // If debugging, make sure that the block size is legal
- #if defined(XERCES_DEBUG)
- checkBlockSize(maxChars);
- #endif
- //
- // Get pointers to the start and end of the source buffer in terms of
- // UCS-4 characters.
- //
- const UCS4Ch* srcPtr = (const UCS4Ch*)srcData;
- const UCS4Ch* srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
- //
- // Get pointers to the start and end of the target buffer, which is
- // in terms of the XMLCh chars we output.
- //
- XMLCh* outPtr = toFill;
- XMLCh* outEnd = toFill + maxChars;
- //
- // And get a pointer into the char sizes buffer. We will run this
- // up as we put chars into the output buffer.
- //
- unsigned char* sizePtr = charSizes;
- //
- // Now process chars until we either use up all our source or all of
- // our output space.
- //
- while ((outPtr < outEnd) && (srcPtr < srcEnd))
- {
- //
- // Get the next UCS char out of the buffer. Don't bump the ptr
- // yet since we might not have enough storage for it in the target
- // (if its causes a surrogate pair to be created.
- //
- UCS4Ch nextVal = *srcPtr;
- // If it needs to be swapped, then do it
- if (fSwapped)
- nextVal = BitOps::swapBytes(nextVal);
- // Handle a surrogate pair if needed
- if (nextVal & 0xFFFF0000)
- {
- //
- // If we don't have room for both of the chars, then we
- // bail out now.
- //
- if (outPtr + 1 == outEnd)
- break;
- const XMLCh ch1 = XMLCh(((nextVal - 0x10000) >> 10) + 0xD800);
- const XMLCh ch2 = XMLCh(((nextVal - 0x10000) & 0x3FF) + 0xDC00);
- //
- // We have room so store them both. But note that the
- // second one took up no source bytes!
- //
- *sizePtr++ = sizeof(UCS4Ch);
- *outPtr++ = ch1;
- *sizePtr++ = 0;
- *outPtr++ = ch2;
- }
- else
- {
- //
- // No surrogate, so just store it and bump the count of chars
- // read. Update the char sizes buffer for this char's entry.
- //
- *sizePtr++ = sizeof(UCS4Ch);
- *outPtr++ = XMLCh(nextVal);
- }
- // Indicate that we ate another UCS char's worth of bytes
- srcPtr++;
- }
- // Set the bytes eaten parameter
- bytesEaten = ((const XMLByte*)srcPtr) - srcData;
- // And return the chars written into the output buffer
- return outPtr - toFill;
- }
- unsigned int
- XMLUCS4Transcoder::transcodeTo( const XMLCh* const srcData
- , const unsigned int srcCount
- , XMLByte* const toFill
- , const unsigned int maxBytes
- , unsigned int& charsEaten
- , const UnRepOpts options)
- {
- // If debugging, make sure that the block size is legal
- #if defined(XERCES_DEBUG)
- checkBlockSize(maxBytes);
- #endif
- //
- // Get pointers to the start and end of the source buffer, which
- // is in terms of XMLCh chars.
- //
- const XMLCh* srcPtr = srcData;
- const XMLCh* srcEnd = srcData + srcCount;
- //
- // Get pointers to the start and end of the target buffer, in terms
- // of UCS-4 chars.
- //
- UCS4Ch* outPtr = (UCS4Ch*)toFill;
- UCS4Ch* outEnd = outPtr + (maxBytes / sizeof(UCS4Ch));
- //
- // Now loop until we either run out of source characters or we
- // fill up our output buffer.
- //
- XMLCh trailCh;
- while ((outPtr < outEnd) && (srcPtr < srcEnd))
- {
- //
- // Get out an XMLCh char from the source. Don't bump up the
- // pointer yet, since it might be a leading for which we don't
- // have the trailing.
- //
- const XMLCh curCh = *srcPtr;
- //
- // If its a leading char of a surrogate pair handle it one way,
- // else just cast it over into the target.
- //
- if ((curCh >= 0xD800) && (curCh <= 0xDBFF))
- {
- //
- // Ok, we have to have another source char available or we
- // just give up without eating the leading char.
- //
- if (srcPtr + 1 == srcEnd)
- break;
- //
- // We have the trailing char, so eat the first char and the
- // trailing char from the source.
- //
- srcPtr++;
- trailCh = *srcPtr++;
- //
- // Then make sure its a legal trailing char. If not, throw
- // an exception.
- //
- if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) )
- ThrowXML(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate);
- // And now combine the two into a single output char
- *outPtr++ = ((curCh - 0xD800) << 10)
- + (trailCh - 0xDC00) + 0x10000;
- }
- else
- {
- //
- // Its just a char, so we can take it as is. If we need to
- // swap it, then swap it. Because of flakey compilers, use
- // a temp first.
- //
- const UCS4Ch tmpCh = UCS4Ch(curCh);
- if (fSwapped)
- *outPtr++ = BitOps::swapBytes(tmpCh);
- else
- *outPtr++ = tmpCh;
- // Bump the source pointer
- srcPtr++;
- }
- }
- // Set the chars we ate from the source
- charsEaten = srcPtr - srcData;
- // Return the bytes we wrote to the output
- return ((XMLByte*)outPtr) - toFill;
- }
- bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int toCheck) const
- {
- // We can handle anything
- return true;
- }
- XERCES_CPP_NAMESPACE_END