html.c
资源名称:gateway-1.2.1 [点击查看]
上传用户:gzpyjq
上传日期:2013-01-31
资源大小:1852k
文件大小:6k
源码类别:
手机WAP编程
开发平台:
WINDOWS
- /*
- * html.c - routines for manipulating HTML.
- *
- * Lars Wirzenius
- */
- #include <ctype.h>
- #include <stdio.h>
- #include <string.h>
- #include "html.h"
- #include "gwlib/gwlib.h"
- #define SMS_MAX 161
- /* Is there a comment beginning at offset `pos'? */
- static int html_comment_begins(Octstr *html, long pos)
- {
- char buf[10];
- octstr_get_many_chars(buf, html, pos, 4);
- buf[5] = ' ';
- return strcmp(buf, "<!--") == 0;
- }
- /* Skip a comment in HTML. */
- static void skip_html_comment(Octstr *html, long *pos)
- {
- long i;
- *pos += 4; /* Skip "<!--" at beginning of comment. */
- i = octstr_search(html, octstr_imm("-->"), *pos);
- if (i == -1)
- *pos = octstr_len(html);
- else
- *pos = i;
- }
- /* Skip a beginning or ending tag in HTML, including any attributes. */
- static void skip_html_tag(Octstr *html, long *pos)
- {
- long i, len;
- int c;
- /* Skip leading '<'. */
- ++(*pos);
- /* Skip name of tag and attributes with values. */
- len = octstr_len(html);
- while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') {
- if (c == '"' || c == ''') {
- i = octstr_search_char(html, c, *pos + 1);
- if (i == -1)
- *pos = len;
- else
- *pos = i + 1;
- } else
- ++(*pos);
- }
- /* Skip trailing '>' if it is there. */
- if (octstr_get_char(html, *pos) == '>')
- ++(*pos);
- }
- /* Convert an HTML entity into a single character and advance `*html' past
- the entity. */
- static void convert_html_entity(Octstr *sms, Octstr *html, long *pos)
- {
- static struct {
- char *entity;
- int latin1;
- }
- tab[] = {
- { "&", '&' },
- { "<", '<' },
- { ">", '>' },
- /* The following is copied from
- http://www.hut.fi/~jkorpela/HTML3.2/latin1.html
- by Jukka Korpela. Hand and script edited to form this
- table. */
- { " ", ' ' },
- { "¡", 161 },
- { "¢", 162 },
- { "£", 163 },
- { "¤", 164 },
- { "¥", 165 },
- { "¦", 166 },
- { "§", 167 },
- { "¨", 168 },
- { "©", 169 },
- { "ª", 170 },
- { "«", 171 },
- { "¬", 172 },
- { "­", 173 },
- { "®", 174 },
- { "¯", 175 },
- { "°", 176 },
- { "±", 177 },
- { "²", 178 },
- { "³", 179 },
- { "´", 180 },
- { "µ", 181 },
- { "¶", 182 },
- { "·", 183 },
- { "¸", 184 },
- { "¹", 185 },
- { "º", 186 },
- { "»", 187 },
- { "¼", 188 },
- { "½", 189 },
- { "¾", 190 },
- { "¿", 191 },
- { "À", 192 },
- { "Á", 193 },
- { "Â", 194 },
- { "Ã", 195 },
- { "Ä", 196 },
- { "Å", 197 },
- { "Æ", 198 },
- { "Ç", 199 },
- { "È", 200 },
- { "É", 201 },
- { "Ê", 202 },
- { "Ë", 203 },
- { "Ì", 204 },
- { "Í", 205 },
- { "Î", 206 },
- { "Ï", 207 },
- { "Ð", 208 },
- { "Ñ", 209 },
- { "Ò", 210 },
- { "Ó", 211 },
- { "Ô", 212 },
- { "Õ", 213 },
- { "Ö", 214 },
- { "×", 215 },
- { "Ø", 216 },
- { "Ù", 217 },
- { "Ú", 218 },
- { "Û", 219 },
- { "Ü", 220 },
- { "Ý", 221 },
- { "Þ", 222 },
- { "ß", 223 },
- { "à", 224 },
- { "á", 225 },
- { "â", 226 },
- { "ã", 227 },
- { "ä", 228 },
- { "å", 229 },
- { "æ", 230 },
- { "ç", 231 },
- { "è", 232 },
- { "é", 233 },
- { "ê", 234 },
- { "ë", 235 },
- { "ì", 236 },
- { "í", 237 },
- { "î", 238 },
- { "ï", 239 },
- { "ð", 240 },
- { "ñ", 241 },
- { "ò", 242 },
- { "ó", 243 },
- { "ô", 244 },
- { "õ", 245 },
- { "ö", 246 },
- { "÷", 247 },
- { "ø", 248 },
- { "ù", 249 },
- { "ú", 250 },
- { "û", 251 },
- { "ü", 252 },
- { "ý", 253 },
- { "þ", 254 },
- { "ÿ", 255 },
- };
- int num_tab = sizeof(tab) / sizeof(tab[0]);
- long i, code;
- size_t len;
- char buf[1024];
- if (octstr_get_char(html, (*pos) + 1) == '#') {
- i = octstr_parse_long(&code, html, (*pos) + 2, 10);
- if (i > 0) {
- if (code < 256)
- octstr_append_char(sms, code);
- *pos = i + 1;
- if (octstr_get_char(html, *pos) == ';')
- ++(*pos);
- }
- } else {
- for (i = 0; i < num_tab; ++i) {
- len = strlen(tab[i].entity);
- octstr_get_many_chars(buf, html, *pos, len);
- buf[len] = ' ';
- if (strcmp(buf, tab[i].entity) == 0) {
- *pos += len;
- octstr_append_char(sms, tab[i].latin1);
- break;
- }
- }
- if (i == num_tab) {
- ++(*pos);
- octstr_append_char(sms, '&');
- }
- }
- }
- Octstr *html_to_sms(Octstr *html)
- {
- long i, len;
- int c;
- Octstr *sms;
- sms = octstr_create("");
- len = octstr_len(html);
- i = 0;
- while (i < len) {
- c = octstr_get_char(html, i);
- switch (c) {
- case '<':
- if (html_comment_begins(html, i))
- skip_html_comment(html, &i);
- else
- skip_html_tag(html, &i);
- break;
- case '&':
- convert_html_entity(sms, html, &i);
- break;
- default:
- octstr_append_char(sms, c);
- ++i;
- break;
- }
- }
- octstr_shrink_blanks(sms);
- octstr_strip_blanks(sms);
- return sms;
- }