rexpr.c
资源名称:pccts133.zip [点击查看]
上传用户:itx_2006
上传日期:2007-01-06
资源大小:493k
文件大小:10k
源码类别:
编译器/解释器
开发平台:
Others
- /*
- * This file contains code for
- *
- * int rexpr(char *expr, char *s);
- *
- * which answers
- *
- * 1 if 's' is in the language described by the regular expression 'expr'
- * 0 if it is not
- * -1 if the regular expression is invalid
- *
- * Language membership is determined by constructing a non-deterministic
- * finite automata (NFA) from the regular expression. A depth-
- * first-search is performed on the NFA (graph) to check for a match of 's'.
- * Each non-epsilon arc consumes one character from 's'. Backtracking is
- * performed to check all possible paths through the NFA.
- *
- * Regular expressions follow the meta-language:
- *
- * <regExpr> ::= <andExpr> ( '|' <andExpr> )*
- *
- * <andExpr> ::= <expr> ( <expr> )*
- *
- * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
- * | '(' <regExpr> ')' <repeatSymbol>
- * | '{' <regExpr> '}' <repeatSymbol>
- * | <atom> <repeatSymbol>
- *
- * <repeatSymbol> ::= { '*' | '+' }
- *
- * <atomList> ::= <atom> ( <atom> )*
- * | { <atomList> } <atom> '-' <atom> { <atomList> }
- *
- * <atom> ::= Token[Atom]
- *
- * Notes:
- * ~ means complement the set in [..]. i.e. all characters not listed
- * * means match 0 or more times (can be on expression or atom)
- * + means match 1 or more times (can be on expression or atom)
- * {} optional
- * () grouping
- * [] set of atoms
- * x-y all characters from x to y (found only in [..])
- * xx the character with value xx
- *
- * Examples:
- * [a-z]+
- * match 1 or more lower-case letters (e.g. variable)
- *
- * 0x[0-9A-Fa-f]+
- * match a hex number with 0x on front (e.g. 0xA1FF)
- *
- * [0-9]+.[0-9]+{e[0-9]+}
- * match a floating point number (e.g. 3.14e21)
- *
- * Code example:
- * if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword
- *
- * Terence Parr
- * Purdue University
- * April 1991
- */
- #include <stdio.h>
- #include <ctype.h>
- #ifdef __STDC__
- #include <stdlib.h>
- #else
- #include <malloc.h>
- #endif
- #include "rexpr.h"
- #ifdef __STDC__
- static int regExpr( GraphPtr g );
- static int andExpr( GraphPtr g );
- static int expr( GraphPtr g );
- static int repeatSymbol( GraphPtr g );
- static int atomList( char *p, int complement );
- static int next( void );
- static ArcPtr newGraphArc( void );
- static NodePtr newNode( void );
- static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );
- static Graph BuildNFA_atom( int label );
- static Graph BuildNFA_AB( Graph A, Graph B );
- static Graph BuildNFA_AorB( Graph A, Graph B );
- static Graph BuildNFA_set( char *s );
- static Graph BuildNFA_Astar( Graph A );
- static Graph BuildNFA_Aplus( Graph A );
- static Graph BuildNFA_Aoptional( Graph A );
- #else
- static int regExpr();
- static int andExpr();
- static int expr();
- static int repeatSymbol();
- static int atomList();
- static int next();
- static ArcPtr newGraphArc();
- static NodePtr newNode();
- static int ArcBetweenGraphNode();
- static Graph BuildNFA_atom();
- static Graph BuildNFA_AB();
- static Graph BuildNFA_AorB();
- static Graph BuildNFA_set();
- static Graph BuildNFA_Astar();
- static Graph BuildNFA_Aplus();
- static Graph BuildNFA_Aoptional();
- #endif
- static char *_c;
- static int token, tokchar;
- static NodePtr accept;
- static NodePtr freelist = NULL;
- /*
- * return 1 if s in language described by expr
- * 0 if s is not
- * -1 if expr is an invalid regular expression
- */
- rexpr(expr, s)
- char *expr, *s;
- {
- NodePtr p,q;
- Graph nfa;
- int result;
- fprintf(stderr, "rexpr(%s,%s);n", expr,s);
- freelist = NULL;
- _c = expr;
- next();
- if ( regExpr(&nfa) == -1 ) return -1;
- accept = nfa.right;
- result = match(nfa.left, s);
- /* free all your memory */
- p = q = freelist;
- while ( p!=NULL ) { q = p->track; free(p); p = q; }
- return result;
- }
- /*
- * do a depth-first-search on the NFA looking for a path from start to
- * accept state labelled with the characters of 's'.
- */
- match(automaton, s)
- NodePtr automaton;
- char *s;
- {
- ArcPtr p;
- if ( automaton == accept && *s == ' ' ) return 1; /* match */
- for (p=automaton->arcs; p!=NULL; p=p->next) /* try all arcs */
- {
- if ( p->label == Epsilon )
- {
- if ( match(p->target, s) ) return 1;
- }
- else if ( p->label == *s )
- if ( match(p->target, s+1) ) return 1;
- }
- return 0;
- }
- /*
- * <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*
- *
- * Return -1 if syntax error
- * Return 0 if none found
- * Return 1 if a regExrp was found
- */
- static
- regExpr(g)
- GraphPtr g;
- {
- Graph g1, g2;
- if ( andExpr(&g1) == -1 )
- {
- return -1;
- }
- while ( token == '|' )
- {
- int a;
- next();
- a = andExpr(&g2);
- if ( a == -1 ) return -1; /* syntax error below */
- else if ( !a ) return 1; /* empty alternative */
- g1 = BuildNFA_AorB(g1, g2);
- }
- if ( token!=' ' ) return -1;
- *g = g1;
- return 1;
- }
- /*
- * <andExpr> ::= <expr> ( <expr> )*
- */
- static
- andExpr(g)
- GraphPtr g;
- {
- Graph g1, g2;
- if ( expr(&g1) == -1 )
- {
- return -1;
- }
- while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )
- {
- if (expr(&g2) == -1) return -1;
- g1 = BuildNFA_AB(g1, g2);
- }
- *g = g1;
- return 1;
- }
- /*
- * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
- * | '(' <regExpr> ')' <repeatSymbol>
- * | '{' <regExpr> '}' <repeatSymbol>
- * | <atom> <repeatSymbol>
- */
- static
- expr(g)
- GraphPtr g;
- {
- int complement = 0;
- char s[257]; /* alloc space for string of char in [] */
- if ( token == '~' || token == '[' )
- {
- if ( token == '~' ) {complement = 1; next();}
- if ( token != '[' ) return -1;
- next();
- if ( atomList( s, complement ) == -1 ) return -1;
- *g = BuildNFA_set( s );
- if ( token != ']' ) return -1;
- next();
- repeatSymbol( g );
- return 1;
- }
- if ( token == '(' )
- {
- next();
- if ( regExpr( g ) == -1 ) return -1;
- if ( token != ')' ) return -1;
- next();
- repeatSymbol( g );
- return 1;
- }
- if ( token == '{' )
- {
- next();
- if ( regExpr( g ) == -1 ) return -1;
- if ( token != '}' ) return -1;
- next();
- /* S p e c i a l C a s e O p t i o n a l { } */
- if ( token != '*' && token != '+' )
- {
- *g = BuildNFA_Aoptional( *g );
- }
- repeatSymbol( g );
- return 1;
- }
- if ( token == Atom )
- {
- *g = BuildNFA_atom( tokchar );
- next();
- repeatSymbol( g );
- return 1;
- }
- return -1;
- }
- /*
- * <repeatSymbol> ::= { '*' | '+' }
- */
- static
- repeatSymbol(g)
- GraphPtr g;
- {
- switch ( token )
- {
- case '*' : *g = BuildNFA_Astar( *g ); next(); break;
- case '+' : *g = BuildNFA_Aplus( *g ); next(); break;
- }
- return 1;
- }
- /*
- * <atomList> ::= <atom> { <atom> }*
- * { <atomList> } <atom> '-' <atom> { <atomList> }
- *
- * a-b is same as ab
- * q-a is same as q
- */
- static
- atomList(p, complement)
- char *p;
- int complement;
- {
- static unsigned char set[256]; /* no duplicates */
- int first, last, i;
- char *s = p;
- if ( token != Atom ) return -1;
- for (i=0; i<256; i++) set[i] = 0;
- while ( token == Atom )
- {
- if ( !set[tokchar] ) *s++ = tokchar;
- set[tokchar] = 1; /* Add atom to set */
- next();
- if ( token == '-' ) /* have we found '-' */
- {
- first = *(s-1); /* Get last char */
- next();
- if ( token != Atom ) return -1;
- else
- {
- last = tokchar;
- }
- for (i = first+1; i <= last; i++)
- {
- if ( !set[tokchar] ) *s++ = i;
- set[i] = 1; /* Add atom to set */
- }
- next();
- }
- }
- *s = ' ';
- if ( complement )
- {
- for (i=0; i<256; i++) set[i] = !set[i];
- for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;
- *s = ' ';
- }
- return 1;
- }
- /* a somewhat stupid lexical analyzer */
- static
- next()
- {
- while ( *_c==' ' || *_c=='t' || *_c=='n' ) _c++;
- if ( *_c=='\' )
- {
- _c++;
- if ( isdigit(*_c) )
- {
- int n=0;
- while ( isdigit(*_c) )
- {
- n = n*10 + (*_c++ - '0');
- }
- if ( n>255 ) n=255;
- tokchar = n;
- }
- else
- {
- switch (*_c)
- {
- case 'n' : tokchar = 'n'; break;
- case 't' : tokchar = 't'; break;
- case 'r' : tokchar = 'r'; break;
- default : tokchar = *_c;
- }
- _c++;
- }
- token = Atom;
- }
- else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&
- *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&
- *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )
- {
- token = Atom;
- tokchar = *_c++;
- }
- else
- {
- token = tokchar = *_c++;
- }
- }
- /* N F A B u i l d i n g R o u t i n e s */
- static
- ArcPtr
- newGraphArc()
- {
- ArcPtr p;
- p = (ArcPtr) calloc(1, sizeof(Arc));
- if ( p==NULL ) {fprintf(stderr,"rexpr: out of memoryn"); exit(-1);}
- if ( freelist != NULL ) p->track = (ArcPtr) freelist;
- freelist = (NodePtr) p;
- return p;
- }
- static
- NodePtr
- newNode()
- {
- NodePtr p;
- p = (NodePtr) calloc(1, sizeof(Node));
- if ( p==NULL ) {fprintf(stderr,"rexpr: out of memoryn"); exit(-1);}
- if ( freelist != NULL ) p->track = freelist;
- freelist = p;
- return p;
- }
- static
- ArcBetweenGraphNodes(i, j, label)
- NodePtr i, j;
- int label;
- {
- ArcPtr a;
- a = newGraphArc();
- if ( i->arcs == NULL ) i->arctail = i->arcs = a;
- else {(i->arctail)->next = a; i->arctail = a;}
- a->label = label;
- a->target = j;
- }
- static Graph
- BuildNFA_atom(label)
- int label;
- {
- Graph g;
- g.left = newNode();
- g.right = newNode();
- ArcBetweenGraphNodes(g.left, g.right, label);
- return( g );
- }
- static Graph
- BuildNFA_AB(A, B)
- Graph A, B;
- {
- Graph g;
- ArcBetweenGraphNodes(A.right, B.left, Epsilon);
- g.left = A.left;
- g.right = B.right;
- return( g );
- }
- static Graph
- BuildNFA_AorB(A, B)
- Graph A, B;
- {
- Graph g;
- g.left = newNode();
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);
- ArcBetweenGraphNodes(g.left, B.left, Epsilon);
- g.right = newNode();
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);
- ArcBetweenGraphNodes(B.right, g.right, Epsilon);
- return( g );
- }
- static Graph
- BuildNFA_set( s )
- char *s;
- {
- Graph g;
- if ( s == NULL ) return g;
- g.left = newNode();
- g.right = newNode();
- while ( *s != ' ' )
- {
- ArcBetweenGraphNodes(g.left, g.right, *s++);
- }
- return g;
- }
- static Graph
- BuildNFA_Astar( A )
- Graph A;
- {
- Graph g;
- g.left = newNode();
- g.right = newNode();
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);
- ArcBetweenGraphNodes(g.left, g.right, Epsilon);
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);
- ArcBetweenGraphNodes(A.right, A.left, Epsilon);
- return( g );
- }
- static Graph
- BuildNFA_Aplus( A )
- Graph A;
- {
- ArcBetweenGraphNodes(A.right, A.left, Epsilon);
- return( A );
- }
- static Graph
- BuildNFA_Aoptional( A )
- Graph A;
- {
- Graph g;
- g.left = newNode();
- g.right = newNode();
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);
- ArcBetweenGraphNodes(g.left, g.right, Epsilon);
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);
- return( g );
- }