XTPMarkupParser.cpp
上传用户:szled88
上传日期:2015-04-09
资源大小:43957k
文件大小:11k
- // XTPMarkupParser.cpp: implementation of the CXTPMarkupParser class.
- //
- // This file is a part of the XTREME TOOLKIT PRO MFC class library.
- // (c)1998-2008 Codejock Software, All Rights Reserved.
- //
- // THIS SOURCE FILE IS THE PROPERTY OF CODEJOCK SOFTWARE AND IS NOT TO BE
- // RE-DISTRIBUTED BY ANY MEANS WHATSOEVER WITHOUT THE EXPRESSED WRITTEN
- // CONSENT OF CODEJOCK SOFTWARE.
- //
- // THIS SOURCE CODE CAN ONLY BE USED UNDER THE TERMS AND CONDITIONS OUTLINED
- // IN THE XTREME TOOLKIT PRO LICENSE AGREEMENT. CODEJOCK SOFTWARE GRANTS TO
- // YOU (ONE SOFTWARE DEVELOPER) THE LIMITED RIGHT TO USE THIS SOFTWARE ON A
- // SINGLE COMPUTER.
- //
- // CONTACT INFORMATION:
- // support@codejock.com
- // http://www.codejock.com
- //
- /////////////////////////////////////////////////////////////////////////////
- #include "stdafx.h"
- #include "Common/XTPVc80Helpers.h"
- #include "XTPMarkupParser.h"
- // Based on code of Andrew Fedoniouk @ terrainformatica.com
- #ifdef _DEBUG
- #undef THIS_FILE
- static char THIS_FILE[]=__FILE__;
- #define new DEBUG_NEW
- #endif
- //////////////////////////////////////////////////////////////////////
- // Construction/Destruction
- //////////////////////////////////////////////////////////////////////
- CXTPMarkupParser::CXTPMarkupParser()
- : m_cInputChar(0),
- m_nValueLength(0),
- m_nTagNameLength(0),
- m_nAttributeNameLength(0),
- m_bGotTail(FALSE)
- {
- m_lpszPos = NULL;
- m_lpszEnd = NULL;
- m_nLine = 0;
- m_nPosition = 0;
- m_bUnicode = FALSE;
- m_nEncoding = CP_ACP;
- m_scan = &CXTPMarkupParser::ScanBody;
- }
- CXTPMarkupParser::~CXTPMarkupParser()
- {
- }
- void CXTPMarkupParser::SetBuffer(LPCSTR lpszStart, LPCSTR lpszEnd)
- {
- m_lpszPos = lpszStart;
- m_lpszEnd = lpszEnd;
- m_bUnicode = FALSE;
- }
- void CXTPMarkupParser::SetBuffer(LPCWSTR lpszStart, LPCWSTR lpszEnd)
- {
- m_lpszPos = (LPCSTR)lpszStart;
- m_lpszEnd = (LPCSTR)lpszEnd;
- m_bUnicode = TRUE;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::GetNextToken()
- {
- return (this->*m_scan)();
- }
- const WCHAR* CXTPMarkupParser::GetValue()
- {
- m_lpszValue[m_nValueLength] = 0;
- return m_lpszValue;
- }
- const WCHAR* CXTPMarkupParser::GetAttributeName()
- {
- m_lpszAttributeName[m_nAttributeNameLength] = 0;
- return m_lpszAttributeName;
- }
- const WCHAR* CXTPMarkupParser::GetTagName()
- {
- m_lpszTagName[m_nTagNameLength] = 0;
- return m_lpszTagName;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ReportError(LPCWSTR lpszError)
- {
- WCSNCPY_S(m_lpszValue, lpszError, 1024);
- m_nValueLength = (int)wcslen(m_lpszValue);
- return tokenError;
- }
- BOOL CXTPMarkupParser::FindFirstTag()
- {
- WCHAR c = GetChar();
- while(c != 0)
- {
- if (c == '<')
- {
- PushBack(c);
- return TRUE;
- }
- else if (!IsWhitespace(c))
- {
- return FALSE;
- }
- c = GetChar();
- }
- return FALSE;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanBody()
- {
- WCHAR c = GetChar();
- m_nValueLength = 0;
- BOOL ws = FALSE;
- if (c == 0) return tokenEof;
- else if (c == '<') return ScanTag();
- else if (c == '&')
- c = ScanEntity();
- else
- ws = IsWhitespace(c);
- while(TRUE)
- {
- AppendValue(c);
- c = GetNextChar();
- if (c == 0) { PushBack(c); break; }
- if (c == '<') { PushBack(c); break; }
- if (c == '&') { PushBack(c); break; }
- if (IsWhitespace(c) != ws)
- {
- PushBack(c);
- break;
- }
- }
- return ws? tokenSpace : tokenWord;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanHead()
- {
- WCHAR c = SkipWhitespace();
- if (c == '>') { m_scan = &CXTPMarkupParser::ScanBody; return ScanBody(); }
- if (c == '/')
- {
- WCHAR t = GetChar();
- if (t == '>') { m_scan = &CXTPMarkupParser::ScanBody; return tokenTagEnd; }
- else { PushBack(t); return ReportError(L"Unexpected token. The expected token is '>'"); }
- }
- m_nAttributeNameLength = 0;
- m_nValueLength = 0;
- // attribute name...
- while(c != '=')
- {
- if ( c == 0) return tokenEof;
- if ( c == '>' ) return ReportError(L"'>' is an unexpected token. The expected token is '='");
- if ( IsWhitespace(c) )
- {
- c = SkipWhitespace();
- if (c != '=') return ReportError(L"Unexpected token. The expected token is '='");
- else break;
- }
- if ( c == '<') return ReportError(L"'<' is an unexpected token. The expected token is '='");
- AppendAttributeName(c);
- c = GetChar();
- }
- c = SkipWhitespace();
- // attribute m_lpszValue...
- if (c == '"')
- {
- while((c = GetChar()) != NULL)
- {
- if (c == '"') return tokenAttribute;
- AppendValue(c);
- }
- }
- else if (c == ''')
- {
- while((c = GetChar()) != NULL)
- {
- if (c == ''') return tokenAttribute;
- AppendValue(c);
- }
- }
- return ReportError(L"Unexpected token. The expected token is '"' or '''");
- }
- // caller already consumed '<'
- // scan header start or tag tail
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanTag()
- {
- m_nTagNameLength = 0;
- WCHAR c = GetChar();
- BOOL is_tail = c == '/';
- if (is_tail) c = GetChar();
- else if ( c == '?' )
- {
- m_scan = &CXTPMarkupParser::ScanPI;
- return tokenPIStart;
- }
- while(c)
- {
- if (IsWhitespace(c)) { c = SkipWhitespace(); break; }
- if (c == '/' || c == '>') break;
- AppendTagName(c);
- switch(m_nTagNameLength)
- {
- case 3:
- if (wcsncmp(m_lpszTagName, L"!--", 3) == 0) { m_scan = &CXTPMarkupParser::ScanComment; return tokenCommentStart; }
- break;
- case 8:
- if ( wcsncmp(m_lpszTagName, L"![CDATA[", 8) == 0 ) { m_scan = &CXTPMarkupParser::ScanCData; return tokenCDataStart; }
- break;
- }
- c = GetChar();
- }
- if (c == 0) return ReportError(L"Unexpected end of file has occurred.");
- if (is_tail)
- {
- if (c == '>') return tokenTagEnd;
- return ReportError(L"Unexpected token. The expected token is '>'");
- }
- else
- PushBack(c);
- m_scan = &CXTPMarkupParser::ScanHead;
- return tokenTagStart;
- }
- // skip whitespaces.
- // returns first non-whitespace WCHAR
- WCHAR CXTPMarkupParser::SkipWhitespace()
- {
- for (WCHAR c = GetChar(); c != 0; c = GetChar())
- {
- if (!IsWhitespace(c)) return c;
- }
- return 0;
- }
- void CXTPMarkupParser::PushBack(WCHAR c)
- {
- m_cInputChar = c;
- }
- WCHAR CXTPMarkupParser::GetNextChar()
- {
- if (m_lpszPos >= m_lpszEnd)
- return NULL;
- WCHAR c = 0;
- if (m_bUnicode)
- {
- c = *((LPCWSTR)m_lpszPos);
- m_lpszPos += sizeof(WCHAR);
- }
- else
- {
- char t = *m_lpszPos;
- if (m_nEncoding == CP_UTF8)
- {
- if( 0 == ( t & 'x80' ) )
- {
- c = t;
- }
- else if('xF0' == (t & 'xF0')) // 1111 - error, more than 16-bit char
- {
- }
- else if( 'xE0' == (t & 'xF0')) // 1110xxxx 10xxxxxx 10xxxxxx
- {
- char t2 = *(++m_lpszPos);
- char t3 = *(++m_lpszPos);
- c = (WCHAR)((WCHAR(t & 'x0F') << 12 ) | ( WCHAR(t2 & 'x3F' ) << 6 ) | WCHAR(t3 & 'x3F' ));
- }
- else if( 'xC0' == (t & 'xE0')) // 110xxxxx 10xxxxxx
- {
- char t2 = *(++m_lpszPos);
- c = (WCHAR)((WCHAR( t & 'x1F' ) << 6 ) | ( t2 & 'x3F' ));
- }
- else
- {
- }
- }
- else if (_istlead(t))
- {
- MultiByteToWideChar(m_nEncoding, 0, m_lpszPos, 2, &c, 1);
- m_lpszPos++;
- }
- else if (t > 0 && t < 128)
- {
- c = t;
- }
- else
- {
- MultiByteToWideChar(m_nEncoding, 0, m_lpszPos, 1, &c, 1);
- }
- m_lpszPos++;
- }
- m_nPosition++;
- if (c == 'r' || c == 'n')
- {
- m_nLine++;
- m_nPosition = 0;
- }
- return c;
- }
- WCHAR CXTPMarkupParser::GetChar()
- {
- if (m_cInputChar) { WCHAR t(m_cInputChar); m_cInputChar = 0; return t; }
- return GetNextChar();
- }
- WCHAR CXTPMarkupParser::ResolveEntity(const WCHAR* buf, int buf_size)
- {
- if (buf[0] == '#')
- {
- int nAscii = 0;
- if (buf[1] == 'x' && buf_size > 2)
- {
- if (WSCANF_S(buf + 2, L"%x", &nAscii) != 1)
- return 0;
- return (WCHAR)nAscii;
- }
- else
- {
- if (WSCANF_S(buf + 1, L"%i", &nAscii) != 1)
- return 0;
- return (WCHAR)nAscii;
- }
- }
- return 0;
- }
- // caller consumed '&'
- WCHAR CXTPMarkupParser::ScanEntity()
- {
- WCHAR buf[32];
- int i = 0;
- WCHAR t;
- for (; i < 31 ; ++i )
- {
- t = GetChar();
- if (t == 0) return tokenEof;
- buf[i] = t;
- if (t == ';')
- break;
- }
- buf[i] = 0;
- if (i == 2)
- {
- if (wcsncmp(buf, L"gt", 2) == 0) return '>';
- if (wcsncmp(buf, L"lt", 2) == 0) return '<';
- }
- else if (i == 3 && (wcsncmp(buf, L"amp", 3) == 0))
- return '&';
- else if (i == 4)
- {
- if (wcsncmp(buf, L"apos", 4) == 0) return ''';
- if (wcsncmp(buf, L"quot", 4) == 0) return '"';
- }
- t = ResolveEntity(buf, i);
- if (t) return t;
- // no luck ...
- AppendValue('&');
- for (int n = 0; n < i; ++n)
- AppendValue(buf[n]);
- return ';';
- }
- BOOL CXTPMarkupParser::IsWhitespace(WCHAR c) const
- {
- return c <= ' '
- && (c == ' ' || c == 't' || c == 'n' || c == 'r' || c == 'f');
- }
- void CXTPMarkupParser::AppendValue(WCHAR c)
- {
- if (m_nValueLength < (XTP_MAX_TOKEN_SIZE - 1))
- m_lpszValue[m_nValueLength++] = c;
- }
- void CXTPMarkupParser::AppendAttributeName(WCHAR c)
- {
- if (m_nAttributeNameLength < (XTP_MAX_NAME_SIZE - 1))
- m_lpszAttributeName[m_nAttributeNameLength++] = c;
- }
- void CXTPMarkupParser::AppendTagName(WCHAR c)
- {
- if (m_nTagNameLength < (XTP_MAX_NAME_SIZE - 1))
- m_lpszTagName[m_nTagNameLength++] = c;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanComment()
- {
- if (m_bGotTail)
- {
- m_scan = &CXTPMarkupParser::ScanBody;
- m_bGotTail = FALSE;
- return tokenCommentEnd;
- }
- for (m_nValueLength = 0; m_nValueLength < (XTP_MAX_TOKEN_SIZE - 1); ++m_nValueLength)
- {
- WCHAR c = GetChar();
- if ( c == 0) return tokenEof;
- m_lpszValue[m_nValueLength] = c;
- if (m_nValueLength >= 2
- && m_lpszValue[m_nValueLength] == '>'
- && m_lpszValue[m_nValueLength - 1] == '-'
- && m_lpszValue[m_nValueLength - 2] == '-')
- {
- m_bGotTail = TRUE;
- m_nValueLength -= 2;
- break;
- }
- }
- return tokenData;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanCData()
- {
- if (m_bGotTail)
- {
- m_scan = &CXTPMarkupParser::ScanBody;
- m_bGotTail = FALSE;
- return tokenCDataEnd;
- }
- for (m_nValueLength = 0; m_nValueLength < (XTP_MAX_TOKEN_SIZE - 1); ++m_nValueLength)
- {
- WCHAR c = GetChar();
- if ( c == 0) return tokenEof;
- m_lpszValue[m_nValueLength] = c;
- if (m_nValueLength >= 2
- && m_lpszValue[m_nValueLength] == '>'
- && m_lpszValue[m_nValueLength - 1] == ']'
- && m_lpszValue[m_nValueLength - 2] == ']')
- {
- m_bGotTail = TRUE;
- m_nValueLength -= 2;
- break;
- }
- }
- return tokenData;
- }
- CXTPMarkupParser::TokenType CXTPMarkupParser::ScanPI()
- {
- if (m_bGotTail)
- {
- m_scan = &CXTPMarkupParser::ScanBody;
- m_bGotTail = FALSE;
- return tokenPIEnd;
- }
- for (m_nValueLength = 0; m_nValueLength < (XTP_MAX_TOKEN_SIZE - 1); ++m_nValueLength)
- {
- WCHAR c = GetChar();
- if ( c == 0)
- return tokenEof;
- if (IsWhitespace(c))
- {
- m_nValueLength--;
- continue;
- }
- m_lpszValue[m_nValueLength] = c;
- if (m_nValueLength >= 1
- && m_lpszValue[m_nValueLength] == '>'
- && m_lpszValue[m_nValueLength - 1] == '?')
- {
- m_bGotTail = TRUE;
- m_nValueLength -= 1;
- break;
- }
- }
- return tokenData;
- }