手机WAP编程

开发平台：
WINDOWS

html.c：源码内容
							/*
 * html.c - routines for manipulating HTML.
 *
 * Lars Wirzenius
 */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "html.h"
#include "gwlib/gwlib.h"
#define SMS_MAX 161
/* Is there a comment beginning at offset `pos'? */
static int html_comment_begins(Octstr *html, long pos)
{
    char buf[10];
    octstr_get_many_chars(buf, html, pos, 4);
    buf[5] = '';
    return strcmp(buf, "<!--") == 0;
}
/* Skip a comment in HTML. */
static void skip_html_comment(Octstr *html, long *pos)
{
    long i;
    *pos += 4; 	/* Skip "<!--" at beginning of comment. */
    i = octstr_search(html, octstr_imm("-->"), *pos);
    if (i == -1)
        *pos = octstr_len(html);
    else
        *pos = i;
}
/* Skip a beginning or ending tag in HTML, including any attributes. */
static void skip_html_tag(Octstr *html, long *pos)
{
    long i, len;
    int c;
    /* Skip leading '<'. */
    ++(*pos);
    /* Skip name of tag and attributes with values. */
    len = octstr_len(html);
    while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') {
        if (c == '"' || c == ''') {
            i = octstr_search_char(html, c, *pos + 1);
            if (i == -1)
                *pos = len;
            else
                *pos = i + 1;
        } else
            ++(*pos);
    }
    /* Skip trailing '>' if it is there. */
    if (octstr_get_char(html, *pos) == '>')
        ++(*pos);
}
/* Convert an HTML entity into a single character and advance `*html' past
   the entity. */
static void convert_html_entity(Octstr *sms, Octstr *html, long *pos)
{
    static struct {
        char *entity;
        int latin1;
    }
    tab[] = {
        { "&amp;", '&' },
        { "&lt;", '<' },
        { "&gt;", '>' },
        /* The following is copied from
        	http://www.hut.fi/~jkorpela/HTML3.2/latin1.html
           by Jukka Korpela. Hand and script edited to form this
           table. */
        { "&nbsp;", ' ' },
        { "&iexcl;", 161 },
        { "&cent;", 162 },
        { "&pound;", 163 },
        { "&curren;", 164 },
        { "&yen;", 165 },
        { "&brvbar;", 166 },
        { "&sect;", 167 },
        { "&uml;", 168 },
        { "&copy;", 169 },
        { "&ordf;", 170 },
        { "&laquo;", 171 },
        { "&not;", 172 },
        { "&shy;", 173 },
        { "&reg;", 174 },
        { "&macr;", 175 },
        { "&deg;", 176 },
        { "&plusmn;", 177 },
        { "&sup2;", 178 },
        { "&sup3;", 179 },
        { "&acute;", 180 },
        { "&micro;", 181 },
        { "&para;", 182 },
        { "&middot;", 183 },
        { "&cedil;", 184 },
        { "&sup1;", 185 },
        { "&ordm;", 186 },
        { "&raquo;", 187 },
        { "&frac14;", 188 },
        { "&frac12;", 189 },
        { "&frac34;", 190 },
        { "&iquest;", 191 },
        { "&Agrave;", 192 },
        { "&Aacute;", 193 },
        { "&Acirc;", 194 },
        { "&Atilde;", 195 },
        { "&Auml;", 196 },
        { "&Aring;", 197 },
        { "&AElig;", 198 },
        { "&Ccedil;", 199 },
        { "&Egrave;", 200 },
        { "&Eacute;", 201 },
        { "&Ecirc;", 202 },
        { "&Euml;", 203 },
        { "&Igrave;", 204 },
        { "&Iacute;", 205 },
        { "&Icirc;", 206 },
        { "&Iuml;", 207 },
        { "&ETH;", 208 },
        { "&Ntilde;", 209 },
        { "&Ograve;", 210 },
        { "&Oacute;", 211 },
        { "&Ocirc;", 212 },
        { "&Otilde;", 213 },
        { "&Ouml;", 214 },
        { "&times;", 215 },
        { "&Oslash;", 216 },
        { "&Ugrave;", 217 },
        { "&Uacute;", 218 },
        { "&Ucirc;", 219 },
        { "&Uuml;", 220 },
        { "&Yacute;", 221 },
        { "&THORN;", 222 },
        { "&szlig;", 223 },
        { "&agrave;", 224 },
        { "&aacute;", 225 },
        { "&acirc;", 226 },
        { "&atilde;", 227 },
        { "&auml;", 228 },
        { "&aring;", 229 },
        { "&aelig;", 230 },
        { "&ccedil;", 231 },
        { "&egrave;", 232 },
        { "&eacute;", 233 },
        { "&ecirc;", 234 },
        { "&euml;", 235 },
        { "&igrave;", 236 },
        { "&iacute;", 237 },
        { "&icirc;", 238 },
        { "&iuml;", 239 },
        { "&eth;", 240 },
        { "&ntilde;", 241 },
        { "&ograve;", 242 },
        { "&oacute;", 243 },
        { "&ocirc;", 244 },
        { "&otilde;", 245 },
        { "&ouml;", 246 },
        { "&divide;", 247 },
        { "&oslash;", 248 },
        { "&ugrave;", 249 },
        { "&uacute;", 250 },
        { "&ucirc;", 251 },
        { "&uuml;", 252 },
        { "&yacute;", 253 },
        { "&thorn;", 254 },
        { "&yuml;", 255 },
    };
    int num_tab = sizeof(tab) / sizeof(tab[0]);
    long i, code;
    size_t len;
    char buf[1024];
    if (octstr_get_char(html, (*pos) + 1) == '#') {
        i = octstr_parse_long(&code, html, (*pos) + 2, 10);
        if (i > 0) {
            if (code < 256)
                octstr_append_char(sms, code);
            *pos = i + 1;
            if (octstr_get_char(html, *pos) == ';')
                ++(*pos);
        }
    } else {
        for (i = 0; i < num_tab; ++i) {
            len = strlen(tab[i].entity);
            octstr_get_many_chars(buf, html, *pos, len);
            buf[len] = '';
            if (strcmp(buf, tab[i].entity) == 0) {
                *pos += len;
                octstr_append_char(sms, tab[i].latin1);
                break;
            }
        }
        if (i == num_tab) {
            ++(*pos);
            octstr_append_char(sms, '&');
        }
    }
}
Octstr *html_to_sms(Octstr *html)
{
    long i, len;
    int c;
    Octstr *sms;
    sms = octstr_create("");
    len = octstr_len(html);
    i = 0;
    while (i < len) {
        c = octstr_get_char(html, i);
        switch (c) {
        case '<':
            if (html_comment_begins(html, i))
                skip_html_comment(html, &i);
            else
                skip_html_tag(html, &i);
            break;
        case '&':
            convert_html_entity(sms, html, &i);
            break;
        default:
            octstr_append_char(sms, c);
            ++i;
            break;
        }
    }
    octstr_shrink_blanks(sms);
    octstr_strip_blanks(sms);
    return sms;
}