- Visual C++源码
- Visual Basic源码
- C++ Builder源码
- Java源码
- Delphi源码
- C/C++源码
- PHP源码
- Perl源码
- Python源码
- Asm源码
- Pascal源码
- Borland C++源码
- Others源码
- SQL源码
- VBScript源码
- JavaScript源码
- ASP/ASPX源码
- C#源码
- Flash/ActionScript源码
- matlab源码
- PowerBuilder源码
- LabView源码
- Flex源码
- MathCAD源码
- VBA源码
- IDL源码
- Lisp/Scheme源码
- VHDL源码
- Objective-C源码
- Fortran源码
- tcl/tk源码
- QT源码
clex.c
资源名称:parse.tar.Z [点击查看]
上传用户:hbdengju
上传日期:2007-01-06
资源大小:11k
文件大小:15k
源码类别:
编译器/解释器
开发平台:
C/C++
- #ifndef INCLUDED_STREAM
- #include <stream.h>
- #endif
- #ifndef INCLUDED_STRING
- #include <string.h>
- #endif
- #ifndef INCLUDED_STDLIB
- #include <stdlib.h>
- #endif
- #ifndef INCLUDED_ASSERT
- #include <assert.h>
- #endif
- #ifndef INCLUDED_CTYPE
- #include <ctype.h>
- #endif
- #include "clex.h"
- // get string value tables, sym_str[] and keyword[] :
- #define CLEX_IMPLEMENTATION 1
- #include "clex_sym.h"
- /******************************************************************************
- * *
- * KWTABLE -- keyword hash table (internal use only) *
- * KWtable implements a collision-free hash table of C++ keywords. The *
- * table size and hash function are computed by use of a standalone C *
- * program, kwhash.c, included in this directory. *
- * *
- ******************************************************************************/
- #define U_short unsigned short
- #define U_char unsigned char
- struct KWtable
- {
- enum { HASHSIZE = 131 }; // as computed by kwhash.c, for a=9,b=2,c=2
- struct {
- char* kwp;
- Clex_sym sym;
- } kwhash[HASHSIZE];
- KWtable(char**);
- U_short hash(const U_char*, U_short len);
- void insert(char*, Clex_sym);
- Clex_sym lookup(char*, short len);
- };
- static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.h
- KWtable::
- KWtable (char** kwl)
- {
- short int i;
- for (i = 0; i < HASHSIZE; ++i)
- kwhash[i].kwp = NULL;
- for (i = 0; i < CLEX_NUMKEYS; ++i)
- insert(kwl[i], KEYWORD_S + i);
- // rely on assert() to prevent hash collisions -- may need
- // a new hash function or table size when keyword added.
- }
- // the values used in the following hash function, and HASHSIZE, were
- // determined by use of the standalone C program kwhash.c, to
- // ensure that no collisions occur.
- inline
- U_short KWtable::
- hash (const U_char* cp, U_short len)
- {
- return (((U_short)cp[0] ) ^
- ((U_short)cp[1] << 9) ^
- ((U_short)cp[len-1] << 2) ^
- (len << 2) ) % HASHSIZE;
- }
- void KWtable::
- insert (char* cp, Clex_sym s)
- {
- U_short h = hash(cp, strlen(cp));
- assert(kwt.kwhash[h].kwp == NULL); // collisions not permitted.
- kwt.kwhash[h].kwp = cp;
- kwt.kwhash[h].sym = s;
- }
- Clex_sym KWtable::
- lookup (char* cp, short len)
- {
- if (len < 2 || len > 9) return (IDENT_S);
- short h = hash(cp, len);
- if (kwt.kwhash[h].kwp == NULL) return (IDENT_S);
- if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S);
- return (kwt.kwhash[h].sym);
- }
- /******************************************************************************
- * *
- * CLEX -- c++ lexical scanner *
- * *
- ******************************************************************************/
- // CONSTRUCTOR Clex:
- // The argument block_brack, if TRUE, dictates that the contents
- // of square brackets "[]" be returned as a string in the string
- // buffer. If false, square brackets are treated as simple tokens.
- Clex::
- Clex (FILE* f, Boolean b)
- {
- fp = f;
- block_brack = b;
- filename[0] = '';
- bufsiz = 0; buf[0] = '';
- // prime the pipeline:
- line_num = 0;
- look = 'n'; // be prepared to handle '#' as first char
- }
- Clex_sym Clex::
- num (char c)
- {
- Clex_sym s = NUM_S;
- bufsiz = 0;
- put_in_buf(c);
- while (isdigit(look))
- buf_one();
- // hexadecimal
- if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X'))
- {
- do { buf_one(); }
- while (isxdigit(look));
- if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
- buf_one();
- return terminate(s);
- }
- // long or unsigned
- if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
- { buf_one(); return terminate(NUM_S); }
- // floating point
- else if (look == '.')
- {
- s = FLOATNUM_S;
- do { buf_one(); }
- while (isdigit(look));
- }
- // scientific notation
- if (look == 'e' || look == 'E')
- {
- s = FLOATNUM_S;
- do { buf_one(); }
- while (isdigit(look));
- }
- else
- return terminate(s);
- if (look == '+' || look == '-')
- do { buf_one(); }
- while (isdigit(look));
- return terminate(s);
- }
- Clex_sym Clex::
- ident (char first)
- {
- register Boolean maybe_kw = TRUE;
- register short bs = 0;
- buf[bs++] = first;
- while (isalnum(look) || look == '_' || look == '$')
- {
- // note: this function accounts for 30% of the total scan time
- if (maybe_kw && (isupper(look) || look == '_' ))
- maybe_kw = FALSE;
- buf[bs++] = look; // don't worry about overflow
- eat_one();
- }
- buf[bs] = '';
- bufsiz = bs;
- if (maybe_kw)
- return kwt.lookup(buf, bufsiz);
- return IDENT_S;
- }
- Clex_sym Clex::
- quote (char c, Clex_sym s, Clex_mode m)
- {
- if (m == CL_NONE)
- bufsiz = 0;
- while (look != c)
- {
- if (look == EOF)
- { return terminate(ERROR_EOF_S); }
- else if (look == 'n')
- { return terminate(ERROR_EOLN_S); }
- else if (look == '\')
- {
- eat_one();
- if (look == 'n')
- { eat_one(); eoln(m|CL_QUOTE); continue; }
- else if (look == EOF)
- { return terminate(ERROR_EOF_S); }
- else
- put_in_buf('\'); // this handles ' and " too.
- }
- buf_one();
- }
- eat_one(); // eat the closing quote
- return terminate(s);
- }
- // lbrack() accumulates the contents between "[" and "]" into
- // the string buffer, handling syntactically quoted strings,
- // comments, and nested brackets. Note that lbrack() is
- // called recursively in the case of nested brackets.
- Clex_sym Clex::
- lbrack (Clex_mode m)
- {
- if (m == CL_NONE)
- bufsiz = 0;
- while (look != ']')
- {
- if (look == EOF)
- return terminate(ERROR_EOF_S);
- else if (look == 'n')
- { eat_one(); eoln(m|CL_BRACK); }
- else if (look == '[')
- {
- buf_one();
- if (lbrack(m|CL_BRACK) == ERROR_EOF_S)
- return ERROR_EOF_S; // already cleaned up.
- else put_in_buf(']');
- }
- else if (look == ''' || look == '"')
- {
- char c = look;
- buf_one();
- (void) quote(c, NONE_S, m|CL_BRACK);
- put_in_buf(c);
- }
- else if (look == '/') // maybe a comment
- {
- eat_one();
- if (look == '/')
- line_comment();
- else if (look == '*')
- {
- block_comment(m|CL_BRACK);
- if (look == EOF) return terminate(ERROR_EOF_S);
- }
- else // stash the '/' and the char after
- { put_in_buf('/'); buf_one(); }
- }
- else // just a character to save
- buf_one();
- }
- eat_one(); // eat the ']'.
- return terminate(LBRACK_S);
- }
- void Clex::
- block_comment(Clex_mode m)
- {
- eat_one(); // eat the '*'
- while (! (look == '*' && (eat_one(), look == '/')) )
- {
- if (look == EOF) return;
- if (look == 'n') { eat_one(); eoln(m|CL_COMMENT); }
- else if (look != '*') eat_one();
- }
- eat_one(); // eat the '/'
- }
- void Clex::
- line_comment()
- {
- do { eat_one(); }
- while (look != 'n' && look != EOF);
- }
- // eat_return() is intended to save space in Clex::next() -- the
- // inline function eat_one() produces quite a lot of code.
- Clex_sym Clex::
- eat_return(Clex_sym s)
- { eat_one(); return s; }
- Clex_sym Clex::
- next()
- {
- short val;
- while (val = look, eat_one(), val != EOF)
- {
- char ch = char(val);
- switch (ch)
- {
- case ' ' : continue;
- case '_' :
- case '$' : return ident(ch);
- case '0' : case '1' : case '2' : case '3' : case '4' :
- case '5' : case '6' : case '7' : case '8' : case '9' :
- return num(ch);
- case ',' : return COMMA_S;
- case ';' : return SEMI_S;
- case '[' : if (block_brack) return lbrack(CL_NONE);
- else return LBRACK_S;
- case ']' : return RBRACK_S;
- case '{' : return LBRACE_S;
- case '}' : return RBRACE_S;
- case '(' : return LPAR_S;
- case ')' : return RPAR_S;
- case '~' : return TILDE_S;
- case '?' : return QUEST_S;
- case '"' : return quote(ch, QUOTE_S, CL_NONE);
- case ''': return quote(ch, APOS_S, CL_NONE);
- case '=' : // '=', '=='
- if (look != '=') return AS_S;
- else return eat_return(EQ_S);
- case ':' : // ":", "::"
- if (look != ':') return COLON_S;
- else return eat_return(SCOPE_S);
- case '!' : // "!", "!="
- if (look != '=') return BANG_S;
- else return eat_return(NE_S);
- case '^' : // "^", "^="
- if (look != '=') return CARET_S;
- else return eat_return(XORAS_S);
- case '*' : // '*', '*='
- if (look != '=') return STAR_S;
- else return eat_return(MULAS_S);
- case '%' : // '%', '%='
- if (look != '=') return MOD_S;
- else return eat_return(MODAS_S);
- case '|' : // "|=", "||", "|"
- if (look == '|') return eat_return(LOR_S);
- else if (look == '=') return eat_return(ORAS_S);
- else return VBAR_S;
- case '&' : // "&", "&=", "&&"
- if (look == '&') return eat_return(LAND_S);
- else if (look == '=') return eat_return(ANDAS_S);
- else return AMPER_S;
- case '+' : // '+', '++', '+='
- if (look == '+') return eat_return(INCRE_S);
- else if (look == '=') return eat_return(ADDAS_S);
- else return PLUS_S;
- case '-' : // '--', '-=', '->', '-',
- if (look == '-') return eat_return(DECRE_S);
- else if (look == '=') return eat_return(SUBAS_S);
- else if (look == '>') return eat_return(DEREF_S);
- else return MINUS_S;
- case '/' : // '/*', '//', '/=', '/'
- if (look == '*')
- {
- block_comment(CL_NONE);
- if (look == EOF) // almost certainly a mistake:
- return ERROR_EOF_S;
- else continue;
- }
- else if (look == '/')
- { line_comment(); continue; }
- else if (look == '=') return eat_return(DIVAS_S);
- else return SLASH_S;
- case '.' : // ".", "..."
- if (isdigit(look)) return num(ch);
- else if (look == '.')
- {
- eat_one(); // check for "..", undefined.
- if (look != '.') return ERROR_UNKN_S;
- else return eat_return(ELLIP_S);
- }
- else return DOT_S;
- case '<' : // '<=', '<', '<<', '<<='
- if (look == '=') return eat_return(LE_S);
- else if (look == '<')
- {
- eat_one();
- if (look != '=') return SHL_S;
- else return eat_return(SHLAS_S);
- }
- else return LT_S;
- case '>' : // '>=', '>', '>>', '>>='
- if (look == '=') return eat_return(GE_S);
- else if (look == '>')
- {
- eat_one();
- if (look != '=') return SHR_S;
- else return eat_return(SHRAS_S);
- }
- else return GT_S;
- default:
- if (isalpha(ch))
- return ident(ch);
- if (ch == 'n')
- eoln(CL_NONE);
- else if (iscntrl(ch))
- continue;
- else
- return ERROR_UNKN_S;
- }
- }
- return EOF_S;
- }
- struct Quickbuf
- {
- short len;
- char line[10240];
- void put_in(char c) { if (len < sizeof(line)-1) line[len++] = c; }
- void terminate() { line[len] = ''; }
- Quickbuf() { len = 0; }
- };
- void Clex::
- eoln(Clex_mode m)
- {
- // assume NL character already eaten.
- ++line_num;
- // don't process '#' lines in quotes, comments, or '#' continuations.
- if (m & (CL_QUOTE|CL_POUND|CL_COMMENT))
- return;
- // eat whitespace
- while (look != EOF && look != 'n')
- {
- if (look == ' ' || iscntrl(char(look))) eat_one();
- else break;
- }
- if (look != '#')
- return;
- // eat the '#' and subsequent whitespace
- do { eat_one(); if (look == EOF || look == 'n') break; }
- while (look == ' ' || iscntrl(char(look)));
- // collect the '#' line
- Quickbuf b;
- do { // record line
- if (look == '\') // check for continuation line
- {
- eat_one();
- if (look == 'n') { eat_one(); eoln(m|CL_POUND); }
- else { b.put_in('\'); }
- }
- else if (look == '/') // check for comment in '#' line
- {
- eat_one();
- if (look == '*')
- {
- block_comment(m|CL_POUND);
- if (look == EOF) break;
- }
- else if (look == '/') line_comment();
- else { b.put_in('/'); }
- }
- else
- {
- if (iscntrl(char(look))) look = ' ';
- b.put_in(look);
- eat_one();
- }
- } while (look != 'n' && look != EOF);
- b.terminate();
- (void) pound(m, b.line, b.len); // call virtual handler
- }
- Boolean Clex::
- pound (Clex_mode m, char* line, short len)
- {
- void(m); // to keep cfront blissful
- char* cp = line;
- if (!isdigit(*cp))
- {
- if (len < 5) return FALSE;
- if (strncmp(cp, "line ", 5) != 0)
- return FALSE; // don't know what it is
- cp += 4;
- while (*cp == ' ') ++cp;
- if (!isdigit(*cp))
- return FALSE;
- }
- // # <line> "<filename>" or #line <line> "<filename>"
- line_num = atoi(cp) - 1; // will be incremented by eoln() later
- while (isdigit(*cp)) ++cp;
- while (*cp == ' ') ++cp;
- if (*cp == '"')
- {
- char* cpq = cp;
- do { ++cpq; }
- while (*cpq != '"' && *cpq != '');
- strncpy(filename, cp+1, cpq - cp - 1);
- filename[cpq - cp - 1] = '';
- }
- return TRUE;
- }
- const char* Clex::
- debug (Clex_sym s)
- {
- return (s >= KEYWORD_S) ? keywords[s - KEYWORD_S] : sym_str[s] ;
- }