RTHtmlRegex.h
上传用户:qhonly
上传日期:2013-06-10
资源大小:487k
文件大小:3k
- #pragma once
- class CRTHtmlRegex
- {
- public:
- CRTHtmlRegex(void)
- {
- }
- virtual ~CRTHtmlRegex(void)
- {
- }
- BOOL Match(const char * input,char ** start,char **end,char **next)
- {
- if(input == NULL)return NULL;
- if(*input == NULL)return NULL;
- char *ptr = (char *)input;
- while(*ptr != ' ')
- {
- if(*ptr == '<')break;
- ptr ++;
- }
- if(*ptr == ' ')return FALSE;
- ptr ++;
- while(*ptr != ' ' && *ptr != '>')
- {
- if(*ptr == 'h' || *ptr == 'H')
- {
- if(MatchHref(ptr,start,end,next))
- return TRUE;
- }
- else if(*ptr == 's' || *ptr == 'S')
- {
- if(MatchSrc(ptr,start,end,next))
- return TRUE;
- }
- ptr ++;
- }
- if(*ptr == ' ')return FALSE;
- ptr ++;
- return Match(ptr,start,end,next);
- }
- private:
- BOOL MatchHref(const char *input,char **start,char **end,char **next)
- {
- char *ptr = (char *)input;
- ptr++;
- if(*ptr != 'r' && *ptr != 'R')return FALSE;
- ptr++;
- if(*ptr != 'e' && *ptr != 'E')return FALSE;
- ptr ++;
- if(*ptr != 'f' && *ptr != 'F')return FALSE;
- //begin with href
- ptr ++;
- while(*ptr > 0 && *ptr < 33)
- ptr ++;
- if(*ptr != '=')return FALSE;
- ptr++;
- while(*ptr > 0 && *ptr < 33)
- ptr ++;
- return MatchLink(ptr,start,end,next);
-
- }
- BOOL MatchSrc(const char *input,char** start,char** end,char **next)
- {
- char *ptr = (char *)input;
- ptr++;
- if(*ptr != 'r' && *ptr != 'R')return FALSE;
- ptr++;
- if(*ptr != 'c' && *ptr != 'C')return FALSE;
- ptr ++;
- //begin with src
- while(*ptr > 0 && *ptr < 33)
- ptr ++;
- if(*ptr != '=')return FALSE;
- ptr++;
- while(*ptr > 0 && *ptr < 33)
- ptr ++;
- return MatchLink(ptr,start,end,next);
- }
- BOOL MatchLink(const char *input,char **start,char**end,char **next)
- {
- if(*input == NULL)return FALSE;
- char *ptr = (char *)input;
- char *ptrStart = NULL;
- char *ptrEnd = NULL;
- if(*ptr == '"')
- {
- ptr++;
- ptrStart = ptr;
- while(*ptr != ' ')
- {
- if(*ptr == '>')break;
- if(*ptr == '"' || *ptr == '#')
- {
- if(ptrEnd == NULL)ptrEnd = ptr;
- }
- ptr ++;
- }
- if(ptrEnd == NULL)return FALSE;
- }
- else if(*ptr == ''')
- {
- ptr++;
- ptrStart = ptr;
- while(*ptr != ' ')
- {
- if(*ptr == '>')break;
- if(*ptr == ''' || *ptr == '#')
- {
- if(ptrEnd == NULL)ptrEnd = ptr;
- }
- ptr ++;
- }
- if(ptrEnd == NULL)return FALSE;
- }
- else
- {
- ptr++;
- ptrStart = ptr;
- while(*ptr != ' ')
- {
- if(*ptr == '>')
- {
- if(ptrEnd == NULL)ptrEnd = ptr;
- break;
- }
- if(*ptr == '#' || (*ptr > 0 && *ptr < 33))
- {
- if(ptrEnd == NULL)ptrEnd = ptr;
- }
- ptr ++;
- }
- if(ptrEnd == NULL)return FALSE;
- }
- if(*ptr != NULL)ptr ++;
-
- *start = ptrStart;
- *end = ptrEnd;
- *next = ptr;
- return TRUE;
- }
- };