xmlencod.cpp
上传用户:zhongxx05
上传日期:2007-06-06
资源大小:33641k
文件大小:10k
- /* ***** BEGIN LICENSE BLOCK *****
- * Version: RCSL 1.0/RPSL 1.0
- *
- * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
- *
- * The contents of this file, and the files included with this file, are
- * subject to the current version of the RealNetworks Public Source License
- * Version 1.0 (the "RPSL") available at
- * http://www.helixcommunity.org/content/rpsl unless you have licensed
- * the file under the RealNetworks Community Source License Version 1.0
- * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
- * in which case the RCSL will apply. You may also obtain the license terms
- * directly from RealNetworks. You may not use this file except in
- * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
- * applicable to this file, the RCSL. Please see the applicable RPSL or
- * RCSL for the rights, obligations and limitations governing use of the
- * contents of the file.
- *
- * This file is part of the Helix DNA Technology. RealNetworks is the
- * developer of the Original Code and owns the copyrights in the portions
- * it created.
- *
- * This file, and the files included with this file, is distributed and made
- * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
- * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- *
- * Technology Compatibility Kit Test Suite(s) Location:
- * http://www.helixcommunity.org/content/tck
- *
- * Contributor(s):
- *
- * ***** END LICENSE BLOCK ***** */
- #include "hxresult.h"
- #include "hxassert.h"
- #include "hxheap.h"
- #include "hxstrutl.h"
- #include "xmlencod.h"
- #include "xmlvalid.h"
- #ifdef _DEBUG
- #undef HX_THIS_FILE
- static const char HX_THIS_FILE[] = __FILE__;
- #endif
- struct xmlEncodingInfo
- {
- char* m_pIANAName; // name of charset, case-insensitive
- BOOL m_bDoubleByte; // true if DBCS charset
- BYTE m_szLeadingByteRange[12]; // up to 5 zero-terminated
- // pairs of lead byte ranges
- } ;
- // Removed static here because it exercises a bug in gcc. pgodman, 3/2/2000
- const xmlEncodingInfo XMLEncodingInfo[] =
- {
- { "Default", TRUE, // assume DBCS; any non-ASCII char is a lead byte
- 0x81, 0xFF, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "US-ASCII", FALSE,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "Shift-JIS", TRUE,
- 0x81, 0x9F, 0xE0, 0xFC, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "Big5", TRUE,
- 0x81, 0xFE, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "GB2312", TRUE,
- 0xA1, 0xFE, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "EUC-KR", TRUE,
- 0x84, 0xD3, 0xD8, 0xD8, 0xD9, 0xDE,
- 0xE0, 0xF9, 0x00, 0x00, 0x00, 0x00 },
- { "ISO-2022-KR", TRUE,
- 0x81, 0xFE, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- { "ISO-8859-1", FALSE,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
- };
- CHXXMLEncode::CHXXMLEncode(const char* pEncoding,
- BYTE* pBuffer,
- UINT32 ulLength):
- m_ulBufferLength(ulLength),
- m_pBuffer(pBuffer),
- m_pCurrent(pBuffer)
- {
- m_lEncodingIndex = GetEncodingIndex(pEncoding);
- }
- CHXXMLEncode::CHXXMLEncode(const char* pEncoding,
- BYTE* pStr):
- m_pBuffer(pStr),
- m_pCurrent(pStr)
- {
- m_ulBufferLength = strlen((char*)pStr) + 1;
- m_lEncodingIndex = GetEncodingIndex(pEncoding);
- }
- CHXXMLEncode::CHXXMLEncode(const CHXXMLEncode& lhs)
- {
- m_ulBufferLength = lhs.m_ulBufferLength;
- m_pBuffer = lhs.m_pBuffer;
- m_pCurrent = m_pBuffer;
- m_lEncodingIndex = lhs.m_lEncodingIndex;
- }
- CHXXMLEncode::~CHXXMLEncode()
- {
- }
- const CHXXMLEncode&
- CHXXMLEncode::operator=(const CHXXMLEncode& lhs)
- {
- m_ulBufferLength = lhs.m_ulBufferLength;
- m_pBuffer = lhs.m_pBuffer;
- m_pCurrent = m_pBuffer; // start at 0
- m_lEncodingIndex = lhs.m_lEncodingIndex;
- return *this;
- }
- CHXXMLEncode::operator const BYTE*() const
- {
- return (const BYTE*)m_pBuffer;
- }
- BYTE*
- CHXXMLEncode::operator+(int offset)
- {
- BYTE* pCh = m_pCurrent;
- for(int nCount = 0; nCount < offset; ++nCount)
- {
- UINT16 uLen = 0;
- GetNextChar(uLen);
- }
- BYTE* pOffset = m_pCurrent;
- m_pCurrent = pCh;
- return pOffset;
- }
- BYTE*
- CHXXMLEncode::operator+=(int offset)
- {
- BYTE* pCh = m_pCurrent;
- for(int nCount = 0; nCount < offset; ++nCount)
- {
- UINT16 uLen = 0;
- pCh = GetNextChar(uLen);
- }
- return pCh;
- }
- BYTE*
- CHXXMLEncode::operator++(int)
- {
- UINT16 uLen = 0;
- return GetNextChar(uLen);
- }
- BYTE*
- CHXXMLEncode::operator--(int)
- {
- UINT16 uLen = 0;
- return GetPrevChar(uLen);
- }
- BOOL
- CHXXMLEncode::IsLeadByte(BYTE ch)
- {
- int bRange = 0;
- const xmlEncodingInfo* pEncInfo = &(XMLEncodingInfo[m_lEncodingIndex]);
- if(pEncInfo->m_bDoubleByte)
- {
- while((bRange < 12) &&
- (pEncInfo->m_szLeadingByteRange[bRange] != 0))
- {
- if(ch >= pEncInfo->m_szLeadingByteRange[bRange] &&
- ch <= pEncInfo->m_szLeadingByteRange[bRange + 1])
- {
- return TRUE;
- }
- bRange += 2;
- }
- }
- return FALSE;
- }
- BYTE*
- CHXXMLEncode::GetNextChar(UINT16& uLen)
- {
- BYTE* pCh = m_pCurrent;
- if(IsLeadByte(*m_pCurrent))
- {
- m_pCurrent += 2; // skip 2 bytes
- uLen = 2;
- return pCh;
- }
- m_pCurrent++; // skip 1 byte
- uLen = 1;
- return pCh;
- }
- BYTE*
- CHXXMLEncode::GetPrevChar(UINT16& uLen)
- {
- BYTE* pCh = m_pCurrent;
- // assumes the m_pCurrent is on a boundary, i.e. it doesn't
- // point to the middle of a DBCS character
- // if current is at beginning or the previous byte is
- // at the beginning, return current
- if(m_pCurrent - 1 <= m_pBuffer)
- {
- return pCh;
- }
- // point to the previous byte
- BYTE* pTemp = m_pCurrent - 1;
- // if pTemp points to a lead byte, then it must be a trail byte;
- // decrement current 2 bytes to lead byte
- if(IsLeadByte(*pTemp))
- {
- m_pCurrent -= 2;
- pCh = m_pCurrent;
- uLen = 2;
- return pCh;
- }
- // otherwise, step back unil a non-lead byte is found
- while(m_pCurrent <= --pTemp && IsLeadByte(*pTemp))
- {
- ;
- }
- // now pTemp + 1 must point to the beginning of a character,
- // so figure out whether we went back to an even or an odd
- // number of bytes and go back 1 or 2 bytes, respectively.
- m_pCurrent = m_pCurrent - 1 - ((m_pCurrent - pTemp) & 1);
- pCh = m_pCurrent;
- if(IsLeadByte(*pCh))
- {
- uLen = 2;
- }
- else
- {
- uLen = 1;
- }
- return pCh;
- }
- BYTE*
- CHXXMLEncode::SetCurrent(BYTE* pCh)
- {
- // no checking other than boundary;
- // if it is set on the trail byte
- // of a DBCS character, bad things
- // will happen...
- HX_ASSERT(pCh >= m_pBuffer &&
- pCh <= m_pBuffer + m_ulBufferLength);
- m_pCurrent = pCh;
- return m_pCurrent;
- }
- UINT32
- CHXXMLEncode::CharCount()
- {
- UINT32 ulLength = 0;
- BYTE* pCh = m_pBuffer;
- if(XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- UINT16 uLen = 0;
- for(; *pCh; pCh = GetNextChar(uLen))
- {
- ++ulLength;
- }
- }
- else
- {
- for(; *pCh; pCh++)
- {
- ++ulLength;
- }
- }
- return ulLength;
- }
- INT32
- CHXXMLEncode::GetEncodingIndex(const char* pEncoding)
- {
- INT32 lIndex = 0; // default
- for(INT32 lCount = 0;
- lCount < sizeof(XMLEncodingInfo) / sizeof(XMLEncodingInfo[0]);
- ++lCount)
- {
- if(strcasecmp(pEncoding, XMLEncodingInfo[lCount].m_pIANAName) == 0)
- {
- lIndex = lCount;
- break;
- }
- }
- return lIndex;
- }
- BOOL
- CHXXMLEncode::IsRefValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsRefValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsNameValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsNameValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation.
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsNmtokenValid(const BYTE*p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsNmtokenValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation.
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsEntityValueValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsEntityValueValid(p, len);
- }
- else
- {
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsAttValueValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsAttValueValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation.
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsSystemLiteralValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsSystemLiteralValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation.
- return TRUE;
- }
- }
- BOOL
- CHXXMLEncode::IsPubidLiteralValid(const BYTE* p, UINT32 len)
- {
- if (!XMLEncodingInfo[m_lEncodingIndex].m_bDoubleByte)
- {
- return ISO8859Valid::IsPubidLiteralValid(p, len);
- }
- else
- {
- // TODO: Double byte Validation.
- return TRUE;
- }
- }