rx.c
上传用户:aoeyumen
上传日期:2007-01-06
资源大小:3329k
文件大小:173k
- /* Copyright (C) 1992, 1993 Free Software Foundation, Inc.
- This file is part of the librx library.
- Librx is free software; you can redistribute it and/or modify it under
- the terms of the GNU Library General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- Librx is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
- You should have received a copy of the GNU Library General Public
- License along with this software; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
- 02139, USA. */
- /* NOTE!!! AIX is so losing it requires this to be the first thing in the
- * file.
- * Do not put ANYTHING before it!
- */
- #if !defined (__GNUC__) && defined (_AIX)
- #pragma alloca
- #endif
- char rx_version_string[] = "GNU Rx version 0.06";
- /* ``Too hard!''
- * -- anon.
- */
- #include <stdio.h>
- #include <ctype.h>
- #ifndef isgraph
- #define isgraph(c) (isprint (c) && !isspace (c))
- #endif
- #ifndef isblank
- #define isblank(c) ((c) == ' ' || (c) == 't')
- #endif
- #include <sys/types.h>
- #undef MAX
- #undef MIN
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- typedef char boolean;
- #define false 0
- #define true 1
- #ifndef RX_DECL
- #define RX_DECL static
- #endif
- #ifndef __GCC__
- #undef __inline__
- #define __inline__
- #endif
- /* Emacs already defines alloca, sometimes. */
- #ifndef alloca
- /* Make alloca work the best possible way. */
- #ifdef __GNUC__
- #define alloca __builtin_alloca
- #else /* not __GNUC__ */
- #if HAVE_ALLOCA_H
- #include <alloca.h>
- #else /* not __GNUC__ or HAVE_ALLOCA_H */
- #ifndef _AIX /* Already did AIX, up at the top. */
- char *alloca ();
- #endif /* not _AIX */
- #endif /* not HAVE_ALLOCA_H */
- #endif /* not __GNUC__ */
- #endif /* not alloca */
- /* Memory management and stuff for emacs. */
- #define CHARBITS 8
- #define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
- /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- * use `alloca' instead of `malloc' for the backtracking stack.
- *
- * Emacs will die miserably if we don't do this.
- */
- #ifdef REGEX_MALLOC
- #define REGEX_ALLOCATE malloc
- #else /* not REGEX_MALLOC */
- #define REGEX_ALLOCATE alloca
- #endif /* not REGEX_MALLOC */
- #ifdef RX_WANT_RX_DEFS
- #define RX_DECL extern
- #define RX_DEF_QUAL
- #else
- #define RX_WANT_RX_DEFS
- #define RX_DECL static
- #define RX_DEF_QUAL static
- #endif
- #include "rx.h"
- #undef RX_DECL
- #define RX_DECL RX_DEF_QUAL
- #ifndef emacs
- #ifdef SYNTAX_TABLE
- extern char *re_syntax_table;
- #else /* not SYNTAX_TABLE */
- /* RX_DECL char re_syntax_table[CHAR_SET_SIZE]; */
- #ifdef __STDC__
- static void
- init_syntax_once (void)
- #else
- static void
- init_syntax_once ()
- #endif
- {
- register int c;
- static int done = 0;
- if (done)
- return;
- bzero (re_syntax_table, sizeof re_syntax_table);
- for (c = 'a'; c <= 'z'; c++)
- re_syntax_table[c] = Sword;
- for (c = 'A'; c <= 'Z'; c++)
- re_syntax_table[c] = Sword;
- for (c = '0'; c <= '9'; c++)
- re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
- done = 1;
- }
- #endif /* not SYNTAX_TABLE */
- #endif /* not emacs */
- /* Compile with `-DRX_DEBUG' and use the following flags.
- *
- * Debugging flags:
- * rx_debug - print information as a regexp is compiled
- * rx_debug_trace - print information as a regexp is executed
- */
- #ifdef RX_DEBUG
- int rx_debug_compile = 0;
- int rx_debug_trace = 0;
- static struct re_pattern_buffer * dbug_rxb = 0;
- #ifdef __STDC__
- typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
- #else
- typedef void (*side_effect_printer) ();
- #endif
- #ifdef __STDC__
- static void print_cset (struct rx *rx, rx_Bitset cset, FILE * fp);
- #else
- static void print_cset ();
- #endif
- #ifdef __STDC__
- static void
- print_rexp (struct rx *rx,
- struct rexp_node *node, int depth,
- side_effect_printer seprint, FILE * fp)
- #else
- static void
- print_rexp (rx, node, depth, seprint, fp)
- struct rx *rx;
- struct rexp_node *node;
- int depth;
- side_effect_printer seprint;
- FILE * fp;
- #endif
- {
- if (!node)
- return;
- else
- {
- switch (node->type)
- {
- case r_cset:
- {
- fprintf (fp, "%*s", depth, "");
- print_cset (rx, node->params.cset, fp);
- fputc ('n', fp);
- break;
- }
- case r_opt:
- case r_star:
- fprintf (fp, "%*s%sn", depth, "",
- node->type == r_opt ? "opt" : "star");
- print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
- break;
- case r_2phase_star:
- fprintf (fp, "%*s2phase starn", depth, "");
- print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
- print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
- break;
- case r_alternate:
- case r_concat:
- fprintf (fp, "%*s%sn", depth, "",
- node->type == r_alternate ? "alt" : "concat");
- print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
- print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
- break;
- case r_side_effect:
- fprintf (fp, "%*sSide effect: ", depth, "");
- seprint (rx, node->params.side_effect, fp);
- fputc ('n', fp);
- }
- }
- }
- #ifdef __STDC__
- static void
- print_nfa (struct rx * rx,
- struct rx_nfa_state * n,
- side_effect_printer seprint, FILE * fp)
- #else
- static void
- print_nfa (rx, n, seprint, fp)
- struct rx * rx;
- struct rx_nfa_state * n;
- side_effect_printer seprint;
- FILE * fp;
- #endif
- {
- while (n)
- {
- struct rx_nfa_edge *e = n->edges;
- struct rx_possible_future *ec = n->futures;
- fprintf (fp, "node %d %sn", n->id,
- n->is_final ? "final" : (n->is_start ? "start" : ""));
- while (e)
- {
- fprintf (fp, " edge to %d, ", e->dest->id);
- switch (e->type)
- {
- case ne_epsilon:
- fprintf (fp, "epsilonn");
- break;
- case ne_side_effect:
- fprintf (fp, "side effect ");
- seprint (rx, e->params.side_effect, fp);
- fputc ('n', fp);
- break;
- case ne_cset:
- fprintf (fp, "cset ");
- print_cset (rx, e->params.cset, fp);
- fputc ('n', fp);
- break;
- }
- e = e->next;
- }
- while (ec)
- {
- int x;
- struct rx_nfa_state_set * s;
- struct rx_se_list * l;
- fprintf (fp, " eclosure to {");
- for (s = ec->destset; s; s = s->cdr)
- fprintf (fp, "%d ", s->car->id);
- fprintf (fp, "} (");
- for (l = ec->effects; l; l = l->cdr)
- {
- seprint (rx, l->car, fp);
- fputc (' ', fp);
- }
- fprintf (fp, ")n");
- ec = ec->next;
- }
- n = n->next;
- }
- }
- static char * efnames [] =
- {
- "bogon",
- "re_se_try",
- "re_se_pushback",
- "re_se_push0",
- "re_se_pushpos",
- "re_se_chkpos",
- "re_se_poppos",
- "re_se_at_dot",
- "re_se_syntax",
- "re_se_not_syntax",
- "re_se_begbuf",
- "re_se_hat",
- "re_se_wordbeg",
- "re_se_wordbound",
- "re_se_notwordbound",
- "re_se_wordend",
- "re_se_endbuf",
- "re_se_dollar",
- "re_se_fail",
- };
- static char * efnames2[] =
- {
- "re_se_win",
- "re_se_lparen",
- "re_se_rparen",
- "re_se_backref",
- "re_se_iter",
- "re_se_end_iter",
- "re_se_tv"
- };
- static char * inx_names[] =
- {
- "rx_backtrack_point",
- "rx_do_side_effects",
- "rx_cache_miss",
- "rx_next_char",
- "rx_backtrack",
- "rx_error_inx",
- "rx_num_instructions"
- };
- #ifdef __STDC__
- static void
- re_seprint (struct rx * rx, void * effect, FILE * fp)
- #else
- static void
- re_seprint (rx, effect, fp)
- struct rx * rx;
- void * effect;
- FILE * fp;
- #endif
- {
- if ((int)effect < 0)
- fputs (efnames[-(int)effect], fp);
- else if (dbug_rxb)
- {
- struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
- fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
- }
- else
- fprintf (fp, "[complex op # %d]", (int)effect);
- }
- /* These are so the regex.c regression tests will compile. */
- void
- print_compiled_pattern (rxb)
- struct re_pattern_buffer * rxb;
- {
- }
- void
- print_fastmap (fm)
- char * fm;
- {
- }
- #endif /* RX_DEBUG */
- /* This page: Bitsets. Completely unintersting. */
- #ifdef __STDC__
- RX_DECL int
- rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL int
- rx_bitset_is_equal (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- RX_subset s = b[0];
- b[0] = ~a[0];
- for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
- ;
- b[0] = s;
- return !x && s == a[0];
- }
- #ifdef __STDC__
- RX_DECL int
- rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL int
- rx_bitset_is_subset (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x = rx_bitset_numb_subsets(size) - 1;
- while (x-- && (a[x] & b[x]) == a[x]);
- return x == -1;
- }
- #ifdef __STDC__
- RX_DECL int
- rx_bitset_empty (int size, rx_Bitset set)
- #else
- RX_DECL int
- rx_bitset_empty (size, set)
- int size;
- rx_Bitset set;
- #endif
- {
- int x;
- RX_subset s = set[0];
- set[0] = 1;
- for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
- ;
- set[0] = s;
- return !s;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_null (int size, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_null (size, b)
- int size;
- rx_Bitset b;
- #endif
- {
- bzero (b, rx_sizeof_bitset(size));
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_universe (int size, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_universe (size, b)
- int size;
- rx_Bitset b;
- #endif
- {
- int x = rx_bitset_numb_subsets (size);
- while (x--)
- *b++ = ~(RX_subset)0;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_complement (int size, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_complement (size, b)
- int size;
- rx_Bitset b;
- #endif
- {
- int x = rx_bitset_numb_subsets (size);
- while (x--)
- {
- *b = ~*b;
- ++b;
- }
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_assign (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] = b[x];
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_union (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] |= b[x];
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_intersection (int size,
- rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_intersection (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] &= b[x];
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_difference (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] &= ~ b[x];
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_revdifference (int size,
- rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_revdifference (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] = ~a[x] & b[x];
- }
- #ifdef __STDC__
- RX_DECL void
- rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
- #else
- RX_DECL void
- rx_bitset_xor (size, a, b)
- int size;
- rx_Bitset a;
- rx_Bitset b;
- #endif
- {
- int x;
- for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
- a[x] ^= b[x];
- }
- #ifdef __STDC__
- RX_DECL unsigned long
- rx_bitset_hash (int size, rx_Bitset b)
- #else
- RX_DECL unsigned long
- rx_bitset_hash (size, b)
- int size;
- rx_Bitset b;
- #endif
- {
- int x;
- unsigned long hash = (unsigned long)rx_bitset_hash;
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- hash ^= rx_bitset_subset_val(b, x);
- return hash;
- }
- RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
- {
- 0x1,
- 0x2,
- 0x4,
- 0x8,
- 0x10,
- 0x20,
- 0x40,
- 0x80,
- 0x100,
- 0x200,
- 0x400,
- 0x800,
- 0x1000,
- 0x2000,
- 0x4000,
- 0x8000,
- 0x10000,
- 0x20000,
- 0x40000,
- 0x80000,
- 0x100000,
- 0x200000,
- 0x400000,
- 0x800000,
- 0x1000000,
- 0x2000000,
- 0x4000000,
- 0x8000000,
- 0x10000000,
- 0x20000000,
- 0x40000000,
- 0x80000000
- };
- #ifdef RX_DEBUG
- #ifdef __STDC__
- static void
- print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
- #else
- static void
- print_cset (rx, cset, fp)
- struct rx *rx;
- rx_Bitset cset;
- FILE * fp;
- #endif
- {
- int x;
- fputc ('[', fp);
- for (x = 0; x < rx->local_cset_size; ++x)
- if (RX_bitset_member (cset, x))
- {
- if (isprint(x))
- fputc (x, fp);
- else
- fprintf (fp, "\0%o ", x);
- }
- fputc (']', fp);
- }
- #endif /* RX_DEBUG */
- static unsigned long rx_hash_masks[4] =
- {
- 0x12488421,
- 0x96699669,
- 0xbe7dd7eb,
- 0xffffffff
- };
- /* Hash tables */
- #ifdef __STDC__
- RX_DECL struct rx_hash_item *
- rx_hash_find (struct rx_hash * table,
- unsigned long hash,
- void * value,
- struct rx_hash_rules * rules)
- #else
- RX_DECL struct rx_hash_item *
- rx_hash_find (table, hash, value, rules)
- struct rx_hash * table;
- unsigned long hash;
- void * value;
- struct rx_hash_rules * rules;
- #endif
- {
- rx_hash_eq eq = rules->eq;
- int maskc = 0;
- long mask = rx_hash_masks [0];
- int bucket = (hash & mask) % 13;
- while (table->children [bucket])
- {
- table = table->children [bucket];
- ++maskc;
- mask = rx_hash_masks[maskc];
- bucket = (hash & mask) % 13;
- }
- {
- struct rx_hash_item * it = table->buckets[bucket];
- while (it)
- if (eq (it->data, value))
- return it;
- else
- it = it->next_same_hash;
- }
- return 0;
- }
- #ifdef __STDC__
- RX_DECL struct rx_hash_item *
- rx_hash_store (struct rx_hash * table,
- unsigned long hash,
- void * value,
- struct rx_hash_rules * rules)
- #else
- RX_DECL struct rx_hash_item *
- rx_hash_store (table, hash, value, rules)
- struct rx_hash * table;
- unsigned long hash;
- void * value;
- struct rx_hash_rules * rules;
- #endif
- {
- rx_hash_eq eq = rules->eq;
- int maskc = 0;
- long mask = rx_hash_masks[0];
- int bucket = (hash & mask) % 13;
- int depth = 0;
-
- while (table->children [bucket])
- {
- table = table->children [bucket];
- ++maskc;
- mask = rx_hash_masks[maskc];
- bucket = (hash & mask) % 13;
- ++depth;
- }
-
- {
- struct rx_hash_item * it = table->buckets[bucket];
- while (it)
- if (eq (it->data, value))
- return it;
- else
- it = it->next_same_hash;
- }
-
- {
- if ( (depth < 3)
- && (table->bucket_size [bucket] >= 4))
- {
- struct rx_hash * newtab = ((struct rx_hash *)
- rules->hash_alloc (rules));
- if (!newtab)
- goto add_to_bucket;
- bzero (newtab, sizeof (*newtab));
- newtab->parent = table;
- {
- struct rx_hash_item * them = table->buckets[bucket];
- unsigned long newmask = rx_hash_masks[maskc + 1];
- while (them)
- {
- struct rx_hash_item * save = them->next_same_hash;
- int new_buck = (them->hash & newmask) % 13;
- them->next_same_hash = newtab->buckets[new_buck];
- newtab->buckets[new_buck] = them;
- them->table = newtab;
- them = save;
- ++newtab->bucket_size[new_buck];
- ++newtab->refs;
- }
- table->refs = (table->refs - table->bucket_size[bucket] + 1);
- table->bucket_size[bucket] = 0;
- table->buckets[bucket] = 0;
- table->children[bucket] = newtab;
- table = newtab;
- bucket = (hash & newmask) % 13;
- }
- }
- }
- add_to_bucket:
- {
- struct rx_hash_item * it = ((struct rx_hash_item *)
- rules->hash_item_alloc (rules, value));
- if (!it)
- return 0;
- it->hash = hash;
- it->table = table;
- /* DATA and BINDING are to be set in hash_item_alloc */
- it->next_same_hash = table->buckets [bucket];
- table->buckets[bucket] = it;
- ++table->bucket_size [bucket];
- ++table->refs;
- return it;
- }
- }
- #ifdef __STDC__
- RX_DECL void
- rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
- #else
- RX_DECL void
- rx_hash_free (it, rules)
- struct rx_hash_item * it;
- struct rx_hash_rules * rules;
- #endif
- {
- if (it)
- {
- struct rx_hash * table = it->table;
- unsigned long hash = it->hash;
- int depth = (table->parent
- ? (table->parent->parent
- ? (table->parent->parent->parent
- ? 3
- : 2)
- : 1)
- : 0);
- int bucket = (hash & rx_hash_masks [depth]) % 13;
- struct rx_hash_item ** pos = &table->buckets [bucket];
-
- while (*pos != it)
- pos = &(*pos)->next_same_hash;
- *pos = it->next_same_hash;
- rules->free_hash_item (it, rules);
- --table->bucket_size[bucket];
- --table->refs;
- while (!table->refs && depth)
- {
- struct rx_hash * save = table;
- table = table->parent;
- --depth;
- bucket = (hash & rx_hash_masks [depth]) % 13;
- --table->refs;
- table->children[bucket] = 0;
- rules->free_hash (save, rules);
- }
- }
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
- struct rx_hash_rules * rules)
- #else
- RX_DECL void
- rx_free_hash_table (tab, freefn, rules)
- struct rx_hash * tab;
- rx_hash_freefn freefn;
- struct rx_hash_rules * rules;
- #endif
- {
- int x;
- for (x = 0; x < 13; ++x)
- if (tab->children[x])
- {
- rx_free_hash_table (tab->children[x], freefn, rules);
- rules->free_hash (tab->children[x], rules);
- }
- else
- {
- struct rx_hash_item * them = tab->buckets[x];
- while (them)
- {
- struct rx_hash_item * that = them;
- them = that->next_same_hash;
- freefn (that);
- rules->free_hash_item (that, rules);
- }
- }
- }
- /* Utilities for manipulating bitset represntations of characters sets. */
- #ifdef __STDC__
- RX_DECL rx_Bitset
- rx_cset (struct rx *rx)
- #else
- RX_DECL rx_Bitset
- rx_cset (rx)
- struct rx *rx;
- #endif
- {
- rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
- if (b)
- rx_bitset_null (rx->local_cset_size, b);
- return b;
- }
- #ifdef __STDC__
- RX_DECL rx_Bitset
- rx_copy_cset (struct rx *rx, rx_Bitset a)
- #else
- RX_DECL rx_Bitset
- rx_copy_cset (rx, a)
- struct rx *rx;
- rx_Bitset a;
- #endif
- {
- rx_Bitset cs = rx_cset (rx);
- if (cs)
- rx_bitset_union (rx->local_cset_size, cs, a);
- return cs;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_cset (struct rx * rx, rx_Bitset c)
- #else
- RX_DECL void
- rx_free_cset (rx, c)
- struct rx * rx;
- rx_Bitset c;
- #endif
- {
- if (c)
- free ((char *)c);
- }
- /* Hash table memory allocation policy for the regexp compiler */
- #ifdef __STDC__
- static struct rx_hash *
- compiler_hash_alloc (struct rx_hash_rules * rules)
- #else
- static struct rx_hash *
- compiler_hash_alloc (rules)
- struct rx_hash_rules * rules;
- #endif
- {
- return (struct rx_hash *)malloc (sizeof (struct rx_hash));
- }
- #ifdef __STDC__
- static struct rx_hash_item *
- compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
- #else
- static struct rx_hash_item *
- compiler_hash_item_alloc (rules, value)
- struct rx_hash_rules * rules;
- void * value;
- #endif
- {
- struct rx_hash_item * it;
- it = (struct rx_hash_item *)malloc (sizeof (*it));
- if (it)
- {
- it->data = value;
- it->binding = 0;
- }
- return it;
- }
- #ifdef __STDC__
- static void
- compiler_free_hash (struct rx_hash * tab,
- struct rx_hash_rules * rules)
- #else
- static void
- compiler_free_hash (tab, rules)
- struct rx_hash * tab;
- struct rx_hash_rules * rules;
- #endif
- {
- free ((char *)tab);
- }
- #ifdef __STDC__
- static void
- compiler_free_hash_item (struct rx_hash_item * item,
- struct rx_hash_rules * rules)
- #else
- static void
- compiler_free_hash_item (item, rules)
- struct rx_hash_item * item;
- struct rx_hash_rules * rules;
- #endif
- {
- free ((char *)item);
- }
- /* This page: REXP_NODE (expression tree) structures. */
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rexp_node (struct rx *rx,
- enum rexp_node_type type)
- #else
- RX_DECL struct rexp_node *
- rexp_node (rx, type)
- struct rx *rx;
- enum rexp_node_type type;
- #endif
- {
- struct rexp_node *n;
- n = (struct rexp_node *)malloc (sizeof (*n));
- bzero (n, sizeof (*n));
- if (n)
- n->type = type;
- return n;
- }
- /* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
- * can be freed using rx_free_cset.
- */
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_cset (struct rx * rx,
- rx_Bitset b)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_cset (rx, b)
- struct rx * rx;
- rx_Bitset b;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_cset);
- if (n)
- n->params.cset = b;
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_concat (struct rx * rx,
- struct rexp_node * a,
- struct rexp_node * b)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_concat (rx, a, b)
- struct rx * rx;
- struct rexp_node * a;
- struct rexp_node * b;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_concat);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_alternate (struct rx * rx,
- struct rexp_node * a,
- struct rexp_node * b)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_alternate (rx, a, b)
- struct rx * rx;
- struct rexp_node * a;
- struct rexp_node * b;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_alternate);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_opt (struct rx * rx,
- struct rexp_node * a)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_opt (rx, a)
- struct rx * rx;
- struct rexp_node * a;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_opt);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_star (struct rx * rx,
- struct rexp_node * a)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_star (rx, a)
- struct rx * rx;
- struct rexp_node * a;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_star);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_2phase_star (struct rx * rx,
- struct rexp_node * a,
- struct rexp_node * b)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_2phase_star (rx, a, b)
- struct rx * rx;
- struct rexp_node * a;
- struct rexp_node * b;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_2phase_star);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_side_effect (struct rx * rx,
- rx_side_effect a)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_side_effect (rx, a)
- struct rx * rx;
- rx_side_effect a;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_side_effect);
- if (n)
- {
- n->params.side_effect = a;
- n->params.pair.right = 0;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_mk_r_data (struct rx * rx,
- void * a)
- #else
- RX_DECL struct rexp_node *
- rx_mk_r_data (rx, a)
- struct rx * rx;
- void * a;
- #endif
- {
- struct rexp_node * n = rexp_node (rx, r_data);
- if (n)
- {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_rexp (struct rx * rx, struct rexp_node * node)
- #else
- RX_DECL void
- rx_free_rexp (rx, node)
- struct rx * rx;
- struct rexp_node * node;
- #endif
- {
- if (node)
- {
- switch (node->type)
- {
- case r_cset:
- if (node->params.cset)
- rx_free_cset (rx, node->params.cset);
- case r_side_effect:
- break;
-
- case r_concat:
- case r_alternate:
- case r_2phase_star:
- case r_opt:
- case r_star:
- rx_free_rexp (rx, node->params.pair.left);
- rx_free_rexp (rx, node->params.pair.right);
- break;
- case r_data:
- /* This shouldn't occur. */
- break;
- }
- free ((char *)node);
- }
- }
- #ifdef __STDC__
- RX_DECL struct rexp_node *
- rx_copy_rexp (struct rx *rx,
- struct rexp_node *node)
- #else
- RX_DECL struct rexp_node *
- rx_copy_rexp (rx, node)
- struct rx *rx;
- struct rexp_node *node;
- #endif
- {
- if (!node)
- return 0;
- else
- {
- struct rexp_node *n = rexp_node (rx, node->type);
- if (!n)
- return 0;
- switch (node->type)
- {
- case r_cset:
- n->params.cset = rx_copy_cset (rx, node->params.cset);
- if (!n->params.cset)
- {
- rx_free_rexp (rx, n);
- return 0;
- }
- break;
- case r_side_effect:
- n->params.side_effect = node->params.side_effect;
- break;
- case r_concat:
- case r_alternate:
- case r_opt:
- case r_2phase_star:
- case r_star:
- n->params.pair.left =
- rx_copy_rexp (rx, node->params.pair.left);
- n->params.pair.right =
- rx_copy_rexp (rx, node->params.pair.right);
- if ( (node->params.pair.left && !n->params.pair.left)
- || (node->params.pair.right && !n->params.pair.right))
- {
- rx_free_rexp (rx, n);
- return 0;
- }
- break;
- case r_data:
- /* shouldn't happen */
- break;
- }
- return n;
- }
- }
- /* This page: functions to build and destroy graphs that describe nfa's */
- /* Constructs a new nfa node. */
- #ifdef __STDC__
- RX_DECL struct rx_nfa_state *
- rx_nfa_state (struct rx *rx)
- #else
- RX_DECL struct rx_nfa_state *
- rx_nfa_state (rx)
- struct rx *rx;
- #endif
- {
- struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
- if (!n)
- return 0;
- bzero (n, sizeof (*n));
- n->next = rx->nfa_states;
- rx->nfa_states = n;
- return n;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_nfa_state (struct rx_nfa_state * n)
- #else
- RX_DECL void
- rx_free_nfa_state (n)
- struct rx_nfa_state * n;
- #endif
- {
- free ((char *)n);
- }
- /* This looks up an nfa node, given a numeric id. Numeric id's are
- * assigned after the nfa has been built.
- */
- #ifdef __STDC__
- RX_DECL struct rx_nfa_state *
- rx_id_to_nfa_state (struct rx * rx,
- int id)
- #else
- RX_DECL struct rx_nfa_state *
- rx_id_to_nfa_state (rx, id)
- struct rx * rx;
- int id;
- #endif
- {
- struct rx_nfa_state * n;
- for (n = rx->nfa_states; n; n = n->next)
- if (n->id == id)
- return n;
- return 0;
- }
- /* This adds an edge between two nodes, but doesn't initialize the
- * edge label.
- */
- #ifdef __STDC__
- RX_DECL struct rx_nfa_edge *
- rx_nfa_edge (struct rx *rx,
- enum rx_nfa_etype type,
- struct rx_nfa_state *start,
- struct rx_nfa_state *dest)
- #else
- RX_DECL struct rx_nfa_edge *
- rx_nfa_edge (rx, type, start, dest)
- struct rx *rx;
- enum rx_nfa_etype type;
- struct rx_nfa_state *start;
- struct rx_nfa_state *dest;
- #endif
- {
- struct rx_nfa_edge *e;
- e = (struct rx_nfa_edge *)malloc (sizeof (*e));
- if (!e)
- return 0;
- e->next = start->edges;
- start->edges = e;
- e->type = type;
- e->dest = dest;
- return e;
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_nfa_edge (struct rx_nfa_edge * e)
- #else
- RX_DECL void
- rx_free_nfa_edge (e)
- struct rx_nfa_edge * e;
- #endif
- {
- free ((char *)e);
- }
- /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
- * of an NFA. These are added to an nfa automaticly by eclose_nfa.
- */
- #ifdef __STDC__
- static struct rx_possible_future *
- rx_possible_future (struct rx * rx,
- struct rx_se_list * effects)
- #else
- static struct rx_possible_future *
- rx_possible_future (rx, effects)
- struct rx * rx;
- struct rx_se_list * effects;
- #endif
- {
- struct rx_possible_future *ec;
- ec = (struct rx_possible_future *) malloc (sizeof (*ec));
- if (!ec)
- return 0;
- ec->destset = 0;
- ec->next = 0;
- ec->effects = effects;
- return ec;
- }
- #ifdef __STDC__
- static void
- rx_free_possible_future (struct rx_possible_future * pf)
- #else
- static void
- rx_free_possible_future (pf)
- struct rx_possible_future * pf;
- #endif
- {
- free ((char *)pf);
- }
- #ifdef __STDC__
- RX_DECL void
- rx_free_nfa (struct rx *rx)
- #else
- RX_DECL void
- rx_free_nfa (rx)
- struct rx *rx;
- #endif
- {
- while (rx->nfa_states)
- {
- while (rx->nfa_states->edges)
- {
- switch (rx->nfa_states->edges->type)
- {
- case ne_cset:
- rx_free_cset (rx, rx->nfa_states->edges->params.cset);
- break;
- default:
- break;
- }
- {
- struct rx_nfa_edge * e;
- e = rx->nfa_states->edges;
- rx->nfa_states->edges = rx->nfa_states->edges->next;
- rx_free_nfa_edge (e);
- }
- } /* while (rx->nfa_states->edges) */
- {
- /* Iterate over the partial epsilon closures of rx->nfa_states */
- struct rx_possible_future * pf = rx->nfa_states->futures;
- while (pf)
- {
- struct rx_possible_future * pft = pf;
- pf = pf->next;
- rx_free_possible_future (pft);
- }
- }
- {
- struct rx_nfa_state *n;
- n = rx->nfa_states;
- rx->nfa_states = rx->nfa_states->next;
- rx_free_nfa_state (n);
- }
- }
- }
- /* This page: translating a pattern expression into an nfa and doing the
- * static part of the nfa->super-nfa translation.
- */
- /* This is the thompson regexp->nfa algorithm.
- * It is modified to allow for `side-effect epsilons.' Those are
- * edges that are taken whenever a similar epsilon edge would be,
- * but which imply that some side effect occurs when the edge
- * is taken.
- *
- * Side effects are used to model parts of the pattern langauge
- * that are not regular (in the formal sense).
- */
- #ifdef __STDC__
- RX_DECL int
- rx_build_nfa (struct rx *rx,
- struct rexp_node *rexp,
- struct rx_nfa_state **start,
- struct rx_nfa_state **end)
- #else
- RX_DECL int
- rx_build_nfa (rx, rexp, start, end)
- struct rx *rx;
- struct rexp_node *rexp;
- struct rx_nfa_state **start;
- struct rx_nfa_state **end;
- #endif
- {
- struct rx_nfa_edge *edge;
- /* Start & end nodes may have been allocated by the caller. */
- *start = *start ? *start : rx_nfa_state (rx);
- if (!*start)
- return 0;
- if (!rexp)
- {
- *end = *start;
- return 1;
- }
- *end = *end ? *end : rx_nfa_state (rx);
- if (!*end)
- {
- rx_free_nfa_state (*start);
- return 0;
- }
- switch (rexp->type)
- {
- case r_data:
- return 0;
- case r_cset:
- edge = rx_nfa_edge (rx, ne_cset, *start, *end);
- if (!edge)
- return 0;
- edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
- if (!edge->params.cset)
- {
- rx_free_nfa_edge (edge);
- return 0;
- }
- return 1;
-
- case r_opt:
- return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
- && rx_nfa_edge (rx, ne_epsilon, *start, *end));
- case r_star:
- {
- struct rx_nfa_state * star_start = 0;
- struct rx_nfa_state * star_end = 0;
- return (rx_build_nfa (rx, rexp->params.pair.left,
- &star_start, &star_end)
- && star_start
- && star_end
- && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
- && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
- && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
- && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
- }
- case r_2phase_star:
- {
- struct rx_nfa_state * star_start = 0;
- struct rx_nfa_state * star_end = 0;
- struct rx_nfa_state * loop_exp_start = 0;
- struct rx_nfa_state * loop_exp_end = 0;
- return (rx_build_nfa (rx, rexp->params.pair.left,
- &star_start, &star_end)
- && rx_build_nfa (rx, rexp->params.pair.right,
- &loop_exp_start, &loop_exp_end)
- && star_start
- && star_end
- && loop_exp_end
- && loop_exp_start
- && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
- && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
- && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
- && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
- && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
- }
- case r_concat:
- {
- struct rx_nfa_state *shared = 0;
- return
- (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
- && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
- }
- case r_alternate:
- {
- struct rx_nfa_state *ls = 0;
- struct rx_nfa_state *le = 0;
- struct rx_nfa_state *rs = 0;
- struct rx_nfa_state *re = 0;
- return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
- && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
- && rx_nfa_edge (rx, ne_epsilon, *start, ls)
- && rx_nfa_edge (rx, ne_epsilon, *start, rs)
- && rx_nfa_edge (rx, ne_epsilon, le, *end)
- && rx_nfa_edge (rx, ne_epsilon, re, *end));
- }
- case r_side_effect:
- edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
- if (!edge)
- return 0;
- edge->params.side_effect = rexp->params.side_effect;
- return 1;
- }
- /* this should never happen */
- return 0;
- }
- /* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
- * transitions. Only these nodes can occur in super-states.
- * All nodes are given an integer id.
- * The id is non-negative if the node has non-epsilon out-transitions, negative
- * otherwise (this is because we want the non-negative ids to be used as
- * array indexes in a few places).
- */
- #ifdef __STDC__
- RX_DECL void
- rx_name_nfa_states (struct rx *rx)
- #else
- RX_DECL void
- rx_name_nfa_states (rx)
- struct rx *rx;
- #endif
- {
- struct rx_nfa_state *n = rx->nfa_states;
- rx->nodec = 0;
- rx->epsnodec = -1;
- while (n)
- {
- struct rx_nfa_edge *e = n->edges;
- if (n->is_start)
- n->eclosure_needed = 1;
- while (e)
- {
- switch (e->type)
- {
- case ne_epsilon:
- case ne_side_effect:
- break;
- case ne_cset:
- n->id = rx->nodec++;
- {
- struct rx_nfa_edge *from_n = n->edges;
- while (from_n)
- {
- from_n->dest->eclosure_needed = 1;
- from_n = from_n->next;
- }
- }
- goto cont;
- }
- e = e->next;
- }
- n->id = rx->epsnodec--;
- cont:
- n = n->next;
- }
- rx->epsnodec = -rx->epsnodec;
- }
- /* This page: data structures for the static part of the nfa->supernfa
- * translation.
- *
- * There are side effect lists -- lists of side effects occuring
- * along an uninterrupted, acyclic path of side-effect epsilon edges.
- * Such paths are collapsed to single edges in the course of computing
- * epsilon closures. Such single edges are labled with a list of all
- * the side effects entailed in crossing them. Like lists of side
- * effects are made == by the constructors below.
- *
- * There are also nfa state sets. These are used to hold a list of all
- * states reachable from a starting state for a given type of transition
- * and side effect list. These are also hash-consed.
- */
- /* The next several functions compare, construct, etc. lists of side
- * effects. See ECLOSE_NFA (below) for details.
- */
- /* Ordering of rx_se_list
- * (-1, 0, 1 return value convention).
- */
- #ifdef __STDC__
- static int
- se_list_cmp (void * va, void * vb)
- #else
- static int
- se_list_cmp (va, vb)
- void * va;
- void * vb;
- #endif
- {
- struct rx_se_list * a = (struct rx_se_list *)va;
- struct rx_se_list * b = (struct rx_se_list *)vb;
- return ((va == vb)
- ? 0
- : (!va
- ? -1
- : (!vb
- ? 1
- : ((long)a->car < (long)b->car
- ? 1
- : ((long)a->car > (long)b->car
- ? -1
- : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
- }
- #ifdef __STDC__
- static int
- se_list_equal (void * va, void * vb)
- #else
- static int
- se_list_equal (va, vb)
- void * va;
- void * vb;
- #endif
- {
- return !(se_list_cmp (va, vb));
- }
- static struct rx_hash_rules se_list_hash_rules =
- {
- se_list_equal,
- compiler_hash_alloc,
- compiler_free_hash,
- compiler_hash_item_alloc,
- compiler_free_hash_item
- };
- #ifdef __STDC__
- static struct rx_se_list *
- side_effect_cons (struct rx * rx,
- void * se, struct rx_se_list * list)
- #else
- static struct rx_se_list *
- side_effect_cons (rx, se, list)
- struct rx * rx;
- void * se;
- struct rx_se_list * list;
- #endif
- {
- struct rx_se_list * l;
- l = ((struct rx_se_list *) malloc (sizeof (*l)));
- if (!l)
- return 0;
- l->car = se;
- l->cdr = list;
- return l;
- }
- #ifdef __STDC__
- static struct rx_se_list *
- hash_cons_se_prog (struct rx * rx,
- struct rx_hash * memo,
- void * car, struct rx_se_list * cdr)
- #else
- static struct rx_se_list *
- hash_cons_se_prog (rx, memo, car, cdr)
- struct rx * rx;
- struct rx_hash * memo;
- void * car;
- struct rx_se_list * cdr;
- #endif
- {
- long hash = (long)car ^ (long)cdr;
- struct rx_se_list template;
- template.car = car;
- template.cdr = cdr;
- {
- struct rx_hash_item * it = rx_hash_store (memo, hash,
- (void *)&template,
- &se_list_hash_rules);
- if (!it)
- return 0;
- if (it->data == (void *)&template)
- {
- struct rx_se_list * consed;
- consed = (struct rx_se_list *) malloc (sizeof (*consed));
- *consed = template;
- it->data = (void *)consed;
- }
- return (struct rx_se_list *)it->data;
- }
- }
-
- #ifdef __STDC__
- static struct rx_se_list *
- hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
- #else
- static struct rx_se_list *
- hash_se_prog (rx, memo, prog)
- struct rx * rx;
- struct rx_hash * memo;
- struct rx_se_list * prog;
- #endif
- {
- struct rx_se_list * answer = 0;
- while (prog)
- {
- answer = hash_cons_se_prog (rx, memo, prog->car, answer);
- if (!answer)
- return 0;
- prog = prog->cdr;
- }
- return answer;
- }
- #ifdef __STDC__
- static int
- nfa_set_cmp (void * va, void * vb)
- #else
- static int
- nfa_set_cmp (va, vb)
- void * va;
- void * vb;
- #endif
- {
- struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
- struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
- return ((va == vb)
- ? 0
- : (!va
- ? -1
- : (!vb
- ? 1
- : (a->car->id < b->car->id
- ? 1
- : (a->car->id > b->car->id
- ? -1
- : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
- }
- #ifdef __STDC__
- static int
- nfa_set_equal (void * va, void * vb)
- #else
- static int
- nfa_set_equal (va, vb)
- void * va;
- void * vb;
- #endif
- {
- return !nfa_set_cmp (va, vb);
- }
- static struct rx_hash_rules nfa_set_hash_rules =
- {
- nfa_set_equal,
- compiler_hash_alloc,
- compiler_free_hash,
- compiler_hash_item_alloc,
- compiler_free_hash_item
- };
- #ifdef __STDC__
- static struct rx_nfa_state_set *
- nfa_set_cons (struct rx * rx,
- struct rx_hash * memo, struct rx_nfa_state * state,
- struct rx_nfa_state_set * set)
- #else
- static struct rx_nfa_state_set *
- nfa_set_cons (rx, memo, state, set)
- struct rx * rx;
- struct rx_hash * memo;
- struct rx_nfa_state * state;
- struct rx_nfa_state_set * set;
- #endif
- {
- struct rx_nfa_state_set template;
- struct rx_hash_item * node;
- template.car = state;
- template.cdr = set;
- node = rx_hash_store (memo,
- (((long)state) >> 8) ^ (long)set,
- &template, &nfa_set_hash_rules);
- if (!node)
- return 0;
- if (node->data == &template)
- {
- struct rx_nfa_state_set * l;
- l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
- node->data = (void *) l;
- if (!l)
- return 0;
- *l = template;
- }
- return (struct rx_nfa_state_set *)node->data;
- }
- #ifdef __STDC__
- static struct rx_nfa_state_set *
- nfa_set_enjoin (struct rx * rx,
- struct rx_hash * memo, struct rx_nfa_state * state,
- struct rx_nfa_state_set * set)
- #else
- static struct rx_nfa_state_set *
- nfa_set_enjoin (rx, memo, state, set)
- struct rx * rx;
- struct rx_hash * memo;
- struct rx_nfa_state * state;
- struct rx_nfa_state_set * set;
- #endif
- {
- if (!set || state->id < set->car->id)
- return nfa_set_cons (rx, memo, state, set);
- if (state->id == set->car->id)
- return set;
- else
- {
- struct rx_nfa_state_set * newcdr
- = nfa_set_enjoin (rx, memo, state, set->cdr);
- if (newcdr != set->cdr)
- set = nfa_set_cons (rx, memo, set->car, newcdr);
- return set;
- }
- }
- /* This page: computing epsilon closures. The closures aren't total.
- * Each node's closures are partitioned according to the side effects entailed
- * along the epsilon edges. Return true on success.
- */
- struct eclose_frame
- {
- struct rx_se_list *prog_backwards;
- };
- #ifdef __STDC__
- static int
- eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
- struct rx_nfa_state *node, struct eclose_frame *frame)
- #else
- static int
- eclose_node (rx, outnode, node, frame)
- struct rx *rx;
- struct rx_nfa_state *outnode;
- struct rx_nfa_state *node;
- struct eclose_frame *frame;
- #endif
- {
- struct rx_nfa_edge *e = node->edges;
- /* For each node, we follow all epsilon paths to build the closure.
- * The closure omits nodes that have only epsilon edges.
- * The closure is split into partial closures -- all the states in
- * a partial closure are reached by crossing the same list of
- * of side effects (though not necessarily the same path).
- */
- if (node->mark)
- return 1;
- node->mark = 1;
- if (node->id >= 0 || node->is_final)
- {
- struct rx_possible_future **ec;
- struct rx_se_list * prog_in_order
- = ((struct rx_se_list *)hash_se_prog (rx,
- &rx->se_list_memo,
- frame->prog_backwards));
- int cmp;
- ec = &outnode->futures;
- while (*ec)
- {
- cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
- if (cmp <= 0)
- break;
- ec = &(*ec)->next;
- }
- if (!*ec || (cmp < 0))
- {
- struct rx_possible_future * saved = *ec;
- *ec = rx_possible_future (rx, prog_in_order);
- (*ec)->next = saved;
- if (!*ec)
- return 0;
- }
- if (node->id >= 0)
- {
- (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
- node, (*ec)->destset);
- if (!(*ec)->destset)
- return 0;
- }
- }
- while (e)
- {
- switch (e->type)
- {
- case ne_epsilon:
- if (!eclose_node (rx, outnode, e->dest, frame))
- return 0;
- break;
- case ne_side_effect:
- {
- frame->prog_backwards = side_effect_cons (rx,
- e->params.side_effect,
- frame->prog_backwards);
- if (!frame->prog_backwards)
- return 0;
- if (!eclose_node (rx, outnode, e->dest, frame))
- return 0;
- {
- struct rx_se_list * dying = frame->prog_backwards;
- frame->prog_backwards = frame->prog_backwards->cdr;
- free ((char *)dying);
- }
- break;
- }
- default:
- break;
- }
- e = e->next;
- }
- node->mark = 0;
- return 1;
- }
- #ifdef __STDC__
- RX_DECL int
- rx_eclose_nfa (struct rx *rx)
- #else
- RX_DECL int
- rx_eclose_nfa (rx)
- struct rx *rx;
- #endif
- {
- struct rx_nfa_state *n = rx->nfa_states;
- struct eclose_frame frame;
- static int rx_id = 0;
-
- frame.prog_backwards = 0;
- rx->rx_id = rx_id++;
- bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
- bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
- while (n)
- {
- n->futures = 0;
- if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
- return 0;
- /* clear_marks (rx); */
- n = n->next;
- }
- return 1;
- }
- /* This deletes epsilon edges from an NFA. After running eclose_node,
- * we have no more need for these edges. They are removed to simplify
- * further operations on the NFA.
- */
- #ifdef __STDC__
- RX_DECL void
- rx_delete_epsilon_transitions (struct rx *rx)
- #else
- RX_DECL void
- rx_delete_epsilon_transitions (rx)
- struct rx *rx;
- #endif
- {
- struct rx_nfa_state *n = rx->nfa_states;
- struct rx_nfa_edge **e;
- while (n)
- {
- e = &n->edges;
- while (*e)
- {
- struct rx_nfa_edge *t;
- switch ((*e)->type)
- {
- case ne_epsilon:
- case ne_side_effect:
- t = *e;
- *e = t->next;
- rx_free_nfa_edge (t);
- break;
- default:
- e = &(*e)->next;
- break;
- }
- }
- n = n->next;
- }
- }
- /* This page: storing the nfa in a contiguous region of memory for
- * subsequent conversion to a super-nfa.
- */
- /* This is for qsort on an array of nfa_states. The order
- * is based on state ids and goes
- * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
- * This way, positive ids double as array indices.
- */
- #ifdef __STDC__
- static int
- nfacmp (void * va, void * vb)
- #else
- static int
- nfacmp (va, vb)
- void * va;
- void * vb;
- #endif
- {
- struct rx_nfa_state **a = (struct rx_nfa_state **)va;
- struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
- return (*a == *b /* &&&& 3.18 */
- ? 0
- : (((*a)->id < 0) == ((*b)->id < 0)
- ? (((*a)->id < (*b)->id) ? -1 : 1)
- : (((*a)->id < 0)
- ? 1 : -1)));
- }
- #ifdef __STDC__
- static int
- count_hash_nodes (struct rx_hash * st)
- #else
- static int
- count_hash_nodes (st)
- struct rx_hash * st;
- #endif
- {
- int x;
- int count = 0;
- for (x = 0; x < 13; ++x)
- count += ((st->children[x])
- ? count_hash_nodes (st->children[x])
- : st->bucket_size[x]);
-
- return count;
- }
- #ifdef __STDC__
- static void
- se_memo_freer (struct rx_hash_item * node)
- #else
- static void
- se_memo_freer (node)
- struct rx_hash_item * node;
- #endif
- {
- free ((char *)node->data);
- }
- #ifdef __STDC__
- static void
- nfa_set_freer (struct rx_hash_item * node)
- #else
- static void
- nfa_set_freer (node)
- struct rx_hash_item * node;
- #endif
- {
- free ((char *)node->data);
- }
- /* This copies an entire NFA into a single malloced block of memory.
- * Mostly this is for compatability with regex.c, though it is convenient
- * to have the nfa nodes in an array.
- */
- #ifdef __STDC__
- RX_DECL int
- rx_compactify_nfa (struct rx *rx,
- void **mem, unsigned long *size)
- #else
- RX_DECL int
- rx_compactify_nfa (rx, mem, size)
- struct rx *rx;
- void **mem;
- unsigned long *size;
- #endif
- {
- int total_nodec;
- struct rx_nfa_state *n;
- int edgec = 0;
- int eclosec = 0;
- int se_list_consc = count_hash_nodes (&rx->se_list_memo);
- int nfa_setc = count_hash_nodes (&rx->set_list_memo);
- unsigned long total_size;
- /* This takes place in two stages. First, the total size of the
- * nfa is computed, then structures are copied.
- */
- n = rx->nfa_states;
- total_nodec = 0;
- while (n)
- {
- struct rx_nfa_edge *e = n->edges;
- struct rx_possible_future *ec = n->futures;
- ++total_nodec;
- while (e)
- {
- ++edgec;
- e = e->next;
- }
- while (ec)
- {
- ++eclosec;
- ec = ec->next;
- }
- n = n->next;
- }
- total_size = (total_nodec * sizeof (struct rx_nfa_state)
- + edgec * rx_sizeof_bitset (rx->local_cset_size)
- + edgec * sizeof (struct rx_nfa_edge)
- + nfa_setc * sizeof (struct rx_nfa_state_set)
- + eclosec * sizeof (struct rx_possible_future)
- + se_list_consc * sizeof (struct rx_se_list)
- + rx->reserved);
- if (total_size > *size)
- {
- *mem = remalloc (*mem, total_size);
- if (*mem)
- *size = total_size;
- else
- return 0;
- }
- /* Now we've allocated the memory; this copies the NFA. */
- {
- static struct rx_nfa_state **scratch = 0;
- static int scratch_alloc = 0;
- struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
- struct rx_nfa_state *new_state = state_base;
- struct rx_nfa_edge *new_edge =
- (struct rx_nfa_edge *)
- ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
- struct rx_se_list * new_se_list =
- (struct rx_se_list *)
- ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
- struct rx_possible_future *new_close =
- ((struct rx_possible_future *)
- ((char *) new_se_list
- + se_list_consc * sizeof (struct rx_se_list)));
- struct rx_nfa_state_set * new_nfa_set =
- ((struct rx_nfa_state_set *)
- ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
- char *new_bitset =
- ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
- int x;
- struct rx_nfa_state *n;
- if (scratch_alloc < total_nodec)
- {
- scratch = ((struct rx_nfa_state **)
- remalloc (scratch, total_nodec * sizeof (*scratch)));
- if (scratch)
- scratch_alloc = total_nodec;
- else
- {
- scratch_alloc = 0;
- return 0;
- }
- }
- for (x = 0, n = rx->nfa_states; n; n = n->next)
- scratch[x++] = n;
- qsort (scratch, total_nodec,
- sizeof (struct rx_nfa_state *), (int (*)())nfacmp);
- for (x = 0; x < total_nodec; ++x)
- {
- struct rx_possible_future *eclose = scratch[x]->futures;
- struct rx_nfa_edge *edge = scratch[x]->edges;
- struct rx_nfa_state *cn = new_state++;
- cn->futures = 0;
- cn->edges = 0;
- cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
- cn->id = scratch[x]->id;
- cn->is_final = scratch[x]->is_final;
- cn->is_start = scratch[x]->is_start;
- cn->mark = 0;
- while (edge)
- {
- int indx = (edge->dest->id < 0
- ? (total_nodec + edge->dest->id)
- : edge->dest->id);
- struct rx_nfa_edge *e = new_edge++;
- rx_Bitset cset = (rx_Bitset) new_bitset;
- new_bitset += rx_sizeof_bitset (rx->local_cset_size);
- rx_bitset_null (rx->local_cset_size, cset);
- rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
- e->next = cn->edges;
- cn->edges = e;
- e->type = edge->type;
- e->dest = state_base + indx;
- e->params.cset = cset;
- edge = edge->next;
- }
- while (eclose)
- {
- struct rx_possible_future *ec = new_close++;
- struct rx_hash_item * sp;
- struct rx_se_list ** sepos;
- struct rx_se_list * sesrc;
- struct rx_nfa_state_set * destlst;
- struct rx_nfa_state_set ** destpos;
- ec->next = cn->futures;
- cn->futures = ec;
- for (sepos = &ec->effects, sesrc = eclose->effects;
- sesrc;
- sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
- {
- sp = rx_hash_find (&rx->se_list_memo,
- (long)sesrc->car ^ (long)sesrc->cdr,
- sesrc, &se_list_hash_rules);
- if (sp->binding)
- {
- sesrc = (struct rx_se_list *)sp->binding;
- break;
- }
- *new_se_list = *sesrc;
- sp->binding = (void *)new_se_list;
- *sepos = new_se_list;
- ++new_se_list;
- }
- *sepos = sesrc;
- for (destpos = &ec->destset, destlst = eclose->destset;
- destlst;
- destpos = &(*destpos)->cdr, destlst = destlst->cdr)
- {
- sp = rx_hash_find (&rx->set_list_memo,
- ((((long)destlst->car) >> 8)
- ^ (long)destlst->cdr),
- destlst, &nfa_set_hash_rules);
- if (sp->binding)
- {
- destlst = (struct rx_nfa_state_set *)sp->binding;
- break;
- }
- *new_nfa_set = *destlst;
- new_nfa_set->car = state_base + destlst->car->id;
- sp->binding = (void *)new_nfa_set;
- *destpos = new_nfa_set;
- ++new_nfa_set;
- }
- *destpos = destlst;
- eclose = eclose->next;
- }
- }
- }
- rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
- bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
- rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
- bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
- rx_free_nfa (rx);
- rx->nfa_states = (struct rx_nfa_state *)*mem;
- return 1;
- }
- /* The functions in the next several pages define the lazy-NFA-conversion used
- * by matchers. The input to this construction is an NFA such as
- * is built by compactify_nfa (rx.c). The output is the superNFA.
- */
- /* Match engines can use arbitrary values for opcodes. So, the parse tree
- * is built using instructions names (enum rx_opcode), but the superstate
- * nfa is populated with mystery opcodes (void *).
- *
- * For convenience, here is an id table. The opcodes are == to their inxs
- *
- * The lables in re_search_2 would make good values for instructions.
- */
- void * rx_id_instruction_table[rx_num_instructions] =
- {
- (void *) rx_backtrack_point,
- (void *) rx_do_side_effects,
- (void *) rx_cache_miss,
- (void *) rx_next_char,
- (void *) rx_backtrack,
- (void *) rx_error_inx
- };
- /* Memory mgt. for superstate graphs. */
- #ifdef __STDC__
- static char *
- rx_cache_malloc (struct rx_cache * cache, int bytes)
- #else
- static char *
- rx_cache_malloc (cache, bytes)
- struct rx_cache * cache;
- int bytes;
- #endif
- {
- while (cache->bytes_left < bytes)
- {
- if (cache->memory_pos)
- cache->memory_pos = cache->memory_pos->next;
- if (!cache->memory_pos)
- {
- cache->morecore (cache);
- if (!cache->memory_pos)
- return 0;
- }
- cache->bytes_left = cache->memory_pos->bytes;
- cache->memory_addr = ((char *)cache->memory_pos
- + sizeof (struct rx_blocklist));
- }
- cache->bytes_left -= bytes;
- {
- char * addr = cache->memory_addr;
- cache->memory_addr += bytes;
- return addr;
- }
- }
- #ifdef __STDC__
- static void
- rx_cache_free (struct rx_cache * cache,
- struct rx_freelist ** freelist, char * mem)
- #else
- static void
- rx_cache_free (cache, freelist, mem)
- struct rx_cache * cache;
- struct rx_freelist ** freelist;
- char * mem;
- #endif
- {
- struct rx_freelist * it = (struct rx_freelist *)mem;
- it->next = *freelist;
- *freelist = it;
- }
- /* The partially instantiated superstate graph has a transition
- * table at every node. There is one entry for every character.
- * This fills in the transition for a set.
- */
- #ifdef __STDC__
- static void
- install_transition (struct rx_superstate *super,
- struct rx_inx *answer, rx_Bitset trcset)
- #else
- static void
- install_transition (super, answer, trcset)
- struct rx_superstate *super;
- struct rx_inx *answer;
- rx_Bitset trcset;
- #endif
- {
- struct rx_inx * transitions = super->transitions;
- int chr;
- for (chr = 0; chr < 256; )
- if (!*trcset)
- {
- ++trcset;
- chr += 32;
- }
- else
- {
- RX_subset sub = *trcset;
- RX_subset mask = 1;
- int bound = chr + 32;
- while (chr < bound)
- {
- if (sub & mask)
- transitions [chr] = *answer;
- ++chr;
- mask <<= 1;
- }
- ++trcset;
- }
- }
- #ifdef __STDC__
- static int
- qlen (struct rx_superstate * q)
- #else
- static int
- qlen (q)
- struct rx_superstate * q;
- #endif
- {
- int count = 1;
- struct rx_superstate * it;
- if (!q)
- return 0;
- for (it = q->next_recyclable; it != q; it = it->next_recyclable)
- ++count;
- return count;
- }
- #ifdef __STDC__
- static void
- check_cache (struct rx_cache * cache)
- #else
- static void
- check_cache (cache)
- struct rx_cache * cache;
- #endif
- {
- struct rx_cache * you_fucked_up = 0;
- int total = cache->superstates;
- int semi = cache->semifree_superstates;
- if (semi != qlen (cache->semifree_superstate))
- check_cache (you_fucked_up);
- if ((total - semi) != qlen (cache->lru_superstate))
- check_cache (you_fucked_up);
- }
- /* When a superstate is old and neglected, it can enter a
- * semi-free state. A semi-free state is slated to die.
- * Incoming transitions to a semi-free state are re-written
- * to cause an (interpreted) fault when they are taken.
- * The fault handler revives the semi-free state, patches
- * incoming transitions back to normal, and continues.
- *
- * The idea is basicly to free in two stages, aborting
- * between the two if the state turns out to be useful again.
- * When a free is aborted, the rescued superstate is placed
- * in the most-favored slot to maximize the time until it
- * is next semi-freed.
- */
- #ifdef __STDC__
- static void
- semifree_superstate (struct rx_cache * cache)
- #else
- static void
- semifree_superstate (cache)
- struct rx_cache * cache;
- #endif
- {
- int disqualified = cache->semifree_superstates;
- if (disqualified == cache->superstates)
- return;
- while (cache->lru_superstate->locks)
- {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++disqualified;
- if (disqualified == cache->superstates)
- return;
- }
- {
- struct rx_superstate * it = cache->lru_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = (it == it->next_recyclable
- ? 0
- : it->next_recyclable);
- if (!cache->semifree_superstate)
- {
- cache->semifree_superstate = it;
- it->next_recyclable = it;
- it->prev_recyclable = it;
- }
- else
- {
- it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
- it->next_recyclable = cache->semifree_superstate;
- it->prev_recyclable->next_recyclable = it;
- it->next_recyclable->prev_recyclable = it;
- }
- {
- struct rx_distinct_future *df;
- it->is_semifree = 1;
- ++cache->semifree_superstates;
- df = it->transition_refs;
- if (df)
- {
- df->prev_same_dest->next_same_dest = 0;
- for (df = it->transition_refs; df; df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- /* If there are any NEXT-CHAR instruction frames that
- * refer to this state, we convert them to CACHE-MISS frames.
- */
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition (df->present, &df->future_frame,
- df->edge->cset);
- }
- df = it->transition_refs;
- df->prev_same_dest->next_same_dest = df;
- }
- }
- }
- }
- #ifdef __STDC__
- static void
- refresh_semifree_superstate (struct rx_cache * cache,
- struct rx_superstate * super)
- #else
- static void
- refresh_semifree_superstate (cache, super)
- struct rx_cache * cache;
- struct rx_superstate * super;
- #endif
- {
- struct rx_distinct_future *df;
- if (super->transition_refs)
- {
- super->transition_refs->prev_same_dest->next_same_dest = 0;
- for (df = super->transition_refs; df; df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_next_char];
- df->future_frame.data = (void *) super->transitions;
- /* CACHE-MISS instruction frames that refer to this state,
- * must be converted to NEXT-CHAR frames.
- */
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition (df->present, &df->future_frame,
- df->edge->cset);
- }
- super->transition_refs->prev_same_dest->next_same_dest
- = super->transition_refs;
- }
- if (cache->semifree_superstate == super)
- cache->semifree_superstate = (super->prev_recyclable == super
- ? 0
- : super->prev_recyclable);
- super->next_recyclable->prev_recyclable = super->prev_recyclable;
- super->prev_recyclable->next_recyclable = super->next_recyclable;
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = super->next_recyclable
- = super->prev_recyclable
- = super);
- else
- {
- super->next_recyclable = cache->lru_superstate;
- super->prev_recyclable = cache->lru_superstate->prev_recyclable;
- super->next_recyclable->prev_recyclable = super;
- super->prev_recyclable->next_recyclable = super;
- }
- super->is_semifree = 0;
- --cache->semifree_superstates;
- }
- #ifdef __STDC__
- static void
- rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
- #else
- static void
- rx_refresh_this_superstate (cache, superstate)
- struct rx_cache * cache;
- struct rx_superstate * superstate;
- #endif
- {
- if (superstate->is_semifree)
- refresh_semifree_superstate (cache, superstate);
- else if (cache->lru_superstate == superstate)
- cache->lru_superstate = superstate->next_recyclable;
- else if (superstate != cache->lru_superstate->prev_recyclable)
- {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable = superstate;
- superstate->prev_recyclable->next_recyclable = superstate;
- }
- }
- #ifdef __STDC__
- static void
- release_superset_low (struct rx_cache * cache,
- struct rx_superset *set)
- #else
- static void
- release_superset_low (cache, set)
- struct rx_cache * cache;
- struct rx_superset *set;
- #endif
- {
- if (!--set->refs)
- {
- if (set->cdr)
- release_superset_low (cache, set->cdr);
- set->starts_for = 0;
- rx_hash_free
- (rx_hash_find
- (&cache->superset_table,
- (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
- (void *)set,
- &cache->superset_hash_rules),
- &cache->superset_hash_rules);
- rx_cache_free (cache, &cache->free_supersets, (char *)set);
- }
- }
- #ifdef __STDC__
- RX_DECL void
- rx_release_superset (struct rx *rx,
- struct rx_superset *set)
- #else
- RX_DECL void
- rx_release_superset (rx, set)
- struct rx *rx;
- struct rx_superset *set;
- #endif
- {
- release_superset_low (rx->cache, set);
- }
- /* This tries to add a new superstate to the superstate freelist.
- * It might, as a result, free some edge pieces or hash tables.
- * If nothing can be freed because too many locks are being held, fail.
- */
- #ifdef __STDC__
- static int
- rx_really_free_superstate (struct rx_cache * cache)
- #else
- static int
- rx_really_free_superstate (cache)
- struct rx_cache * cache;
- #endif
- {
- int locked_superstates = 0;
- struct rx_superstate * it;
- if (!cache->superstates)
- return 0;
- {
- /* This is a total guess. The idea is that we should expect as
- * many misses as we've recently experienced. I.e., cache->misses
- * should be the same as cache->semifree_superstates.
- */
- while ((cache->hits + cache->misses) > cache->superstates_allowed)
- {
- cache->hits >>= 1;
- cache->misses >>= 1;
- }
- if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
- < (cache->superstates * cache->misses))
- {
- semifree_superstate (cache);
- semifree_superstate (cache);
- }
- }
- while (cache->semifree_superstate && cache->semifree_superstate->locks)
- {
- refresh_semifree_superstate (cache, cache->semifree_superstate);
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
- if (cache->semifree_superstate)
- {
- it = cache->semifree_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->semifree_superstate = ((it == it->next_recyclable)
- ? 0
- : it->next_recyclable);
- --cache->semifree_superstates;
- }
- else
- {
- while (cache->lru_superstate->locks)
- {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
- it = cache->lru_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = ((it == it->next_recyclable)
- ? 0
- : it->next_recyclable);
- }
- if (it->transition_refs)
- {
- struct rx_distinct_future *df;
- for (df = it->transition_refs,
- df->prev_same_dest->next_same_dest = 0;
- df;
- df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- df->future = 0;
- }
- it->transition_refs->prev_same_dest->next_same_dest =
- it->transition_refs;
- }
- {
- struct rx_super_edge *tc = it->edges;
- while (tc)
- {
- struct rx_distinct_future * df;
- struct rx_super_edge *tct = tc->next;
- df = tc->options;
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (df)
- {
- struct rx_distinct_future *dft = df;
- df = df->next_same_super_edge[0];
-
-
- if (dft->future && dft->future->transition_refs == dft)
- {
- dft->future->transition_refs = dft->next_same_dest;
- if (dft->future->transition_refs == dft)
- dft->future->transition_refs = 0;
- }
- dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
- dft->prev_same_dest->next_same_dest = dft->next_same_dest;
- rx_cache_free (cache, &cache->free_discernable_futures,
- (char *)dft);
- }
- rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
- tc = tct;
- }
- }
-
- if (it->contents->superstate == it)
- it->contents->superstate = 0;
- release_superset_low (cache, it->contents);
- rx_cache_free (cache, &cache->free_superstates, (char *)it);
- --cache->superstates;
- return 1;
- }
- #ifdef __STDC__
- static char *
- rx_cache_get (struct rx_cache * cache,
- struct rx_freelist ** freelist)
- #else
- static char *
- rx_cache_get (cache, freelist)
- struct rx_cache * cache;
- struct rx_freelist ** freelist;
- #endif
- {
- while (!*freelist && rx_really_free_superstate (cache))
- ;
- if (!*freelist)
- return 0;
- {
- struct rx_freelist * it = *freelist;
- *freelist = it->next;
- return (char *)it;
- }
- }
- #ifdef __STDC__
- static char *
- rx_cache_malloc_or_get (struct rx_cache * cache,
- struct rx_freelist ** freelist, int bytes)
- #else
- static char *
- rx_cache_malloc_or_get (cache, freelist, bytes)
- struct rx_cache * cache;
- struct rx_freelist ** freelist;
- int bytes;
- #endif
- {
- if (!*freelist)
- {
- char * answer = rx_cache_malloc (cache, bytes);
- if (answer)
- return answer;
- }
- return rx_cache_get (cache, freelist);
- }
- #ifdef __STDC__
- static char *
- rx_cache_get_superstate (struct rx_cache * cache)
- #else
- static char *
- rx_cache_get_superstate (cache)
- struct rx_cache * cache;
- #endif
- {
- char * answer;
- int bytes = ( sizeof (struct rx_superstate)
- + cache->local_cset_size * sizeof (struct rx_inx));
- if (!cache->free_superstates
- && (cache->superstates < cache->superstates_allowed))
- {
- answer = rx_cache_malloc (cache, bytes);
- if (answer)
- {
- ++cache->superstates;
- return answer;
- }
- }
- answer = rx_cache_get (cache, &cache->free_superstates);
- if (!answer)
- {
- answer = rx_cache_malloc (cache, bytes);
- if (answer)
- ++cache->superstates_allowed;
- }
- ++cache->superstates;
- return answer;
- }
- #ifdef __STDC__
- static int
- supersetcmp (void * va, void * vb)
- #else
- static int
- supersetcmp (va, vb)
- void * va;
- void * vb;
- #endif
- {
- struct rx_superset * a = (struct rx_superset *)va;
- struct rx_superset * b = (struct rx_superset *)vb;
- return ( (a == b)
- || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
- }
- #ifdef __STDC__
- static struct rx_hash_item *
- superset_allocator (struct rx_hash_rules * rules, void * val)
- #else
- static struct rx_hash_item *
- superset_allocator (rules, val)
- struct rx_hash_rules * rules;
- void * val;
- #endif
- {
- struct rx_cache * cache
- = ((struct rx_cache *)
- ((char *)rules
- - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
- struct rx_superset * template = (struct rx_superset *)val;
- struct rx_superset * newset
- = ((struct rx_superset *)
- rx_cache_malloc_or_get (cache,
- &cache->free_supersets,
- sizeof (*template)));
- if (!newset)
- return 0;
- newset->refs = 0;
- newset->car = template->car;
- newset->id = template->car->id;
- newset->cdr = template->cdr;
- newset->superstate = 0;
- rx_protect_superset (rx, template->cdr);
- newset->hash_item.data = (void *)newset;
- newset->hash_item.binding = 0;
- return &newset->hash_item;
- }
- #ifdef __STDC__
- static struct rx_hash *
- super_hash_allocator (struct rx_hash_rules * rules)
- #else
- static struct rx_hash *
- super_hash_allocator (rules)
- struct rx_hash_rules * rules;
- #endif
- {
- struct rx_cache * cache
- = ((struct rx_cache *)
- ((char *)rules
- - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
- return ((struct rx_hash *)
- rx_cache_malloc_or_get (cache,
- &cache->free_hash, sizeof (struct rx_hash)));
- }
- #ifdef __STDC__
- static void
- super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
- #else
- static void
- super_hash_liberator (hash, rules)
- struct rx_hash * hash;
- struct rx_hash_rules * rules;
- #endif
- {
- struct rx_cache * cache
- = ((struct rx_cache *)
- (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
- rx_cache_free (cache, &cache->free_hash, (char *)hash);
- }
- #ifdef __STDC__
- static void
- superset_hash_item_liberator (struct rx_hash_item * it,
- struct rx_hash_rules * rules)
- #else
- static void
- superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
- struct rx_hash_item * it;
- struct rx_hash_rules * rules;
- #endif
- {
- }
- int rx_cache_bound = 128;
- static int rx_default_cache_got = 0;
- #ifdef __STDC__
- static int
- bytes_for_cache_size (int supers, int cset_size)
- #else
- static int
- bytes_for_cache_size (supers, cset_size)
- int supers;
- int cset_size;
- #endif
- {
- /* What the hell is this? !!!*/
- return (int)
- ((float)supers *
- ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
- + sizeof (struct rx_super_edge)))
- + (1.80 * (float) sizeof (struct rx_possible_future))
- + (float) ( sizeof (struct rx_superstate)
- + cset_size * sizeof (struct rx_inx))));
- }
- #ifdef __STDC__
- static void
- rx_morecore (struct rx_cache * cache)
- #else
- static void
- rx_morecore (cache)
- struct rx_cache * cache;
- #endif
- {
- if (rx_default_cache_got >= rx_cache_bound)
- return;
- rx_default_cache_got += 16;
- cache->superstates_allowed = rx_cache_bound;
- {
- struct rx_blocklist ** pos = &cache->memory;
- int size = bytes_for_cache_size (16, cache->local_cset_size);
- while (*pos)
- pos = &(*pos)->next;
- *pos = ((struct rx_blocklist *)
- malloc (size + sizeof (struct rx_blocklist)));
- if (!*pos)
- return;
- (*pos)->next = 0;
- (*pos)->bytes = size;
- cache->memory_pos = *pos;
- cache->memory_addr = (char *)*pos + sizeof (**pos);
- cache->bytes_left = size;
- }
- }
- static struct rx_cache default_cache =
- {
- {
- supersetcmp,
- super_hash_allocator,
- super_hash_liberator,
- superset_allocator,
- superset_hash_item_liberator,
- },
- 0,
- 0,
- 0,
- 0,
- rx_morecore,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 128,
- 256,
- rx_id_instruction_table,
- {
- 0,
- 0,
- {0},
- {0},
- {0}
- }
- };
- /* This adds an element to a superstate set. These sets are lists, such
- * that lists with == elements are ==. The empty set is returned by
- * superset_cons (rx, 0, 0) and is NOT equivelent to
- * (struct rx_superset)0.
- */
- #ifdef __STDC__
- RX_DECL struct rx_superset *
- rx_superset_cons (struct rx * rx,
- struct rx_nfa_state *car, struct rx_superset *cdr)
- #else
- RX_DECL struct rx_superset *
- rx_superset_cons (rx, car, cdr)
- struct rx * rx;
- struct rx_nfa_state *car;
- struct rx_superset *cdr;
- #endif
- {
- struct rx_cache * cache = rx->cache;
- if (!car && !cdr)
- {
- if (!cache->empty_superset)
- {
- cache->empty_superset
- = ((struct rx_superset *)
- rx_cache_malloc_or_get (cache, &cache->free_supersets,
- sizeof (struct rx_superset)));
- if (!cache->empty_superset)
- return 0;
- bzero (cache->empty_superset, sizeof (struct rx_superset));
- cache->empty_superset->refs = 1000;
- }
- return cache->empty_superset;
- }
- {
- struct rx_superset template;
- struct rx_hash_item * hit;
- template.car = car;
- template.cdr = cdr;
- template.id = car->id;
- hit = rx_hash_store (&cache->superset_table,
- (unsigned long)car ^ car->id ^ (unsigned long)cdr,
- (void *)&template,
- &cache->superset_hash_rules);
- return (hit
- ? (struct rx_superset *)hit->data
- : 0);
- }
- }
- /* This computes a union of two NFA state sets. The sets do not have the
- * same representation though. One is a RX_SUPERSET structure (part
- * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
- */
- #ifdef __STDC__
- RX_DECL struct rx_superset *
- rx_superstate_eclosure_union
- (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
- #else
- RX_DECL struct rx_superset *
- rx_superstate_eclosure_union (rx, set, ecl)
- struct rx * rx;
- struct rx_superset *set;
- struct rx_nfa_state_set *ecl;
- #endif
- {
- if (!ecl)
- return set;
- if (!set->car)
- return rx_superset_cons (rx, ecl->car,
- rx_superstate_eclosure_union (rx, set, ecl->cdr));
- if (set->car == ecl->car)
- return rx_superstate_eclosure_union (rx, set, ecl->cdr);
- {
- struct rx_superset * tail;
- struct rx_nfa_state * first;
- if (set->car > ecl->car)
- {
- tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
- first = set->car;
- }
- else
- {
- tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
- first = ecl->car;
- }
- if (!tail)
- return 0;
- else
- {
- struct rx_superset * answer;
- answer = rx_superset_cons (rx, first, tail);
- if (!answer)
- {
- rx_protect_superset (rx, tail);
- rx_release_superset (rx, tail);
- return 0;
- }
- else
- return answer;
- }
- }
- }
- /*
- * This makes sure that a list of rx_distinct_futures contains
- * a future for each possible set of side effects in the eclosure
- * of a given state. This is some of the work of filling in a
- * superstate transition.
- */
- #ifdef __STDC__
- static struct rx_distinct_future *
- include_futures (struct rx *rx,
- struct rx_distinct_future *df, struct rx_nfa_state
- *state, struct rx_superstate *superstate)
- #else
- static struct rx_distinct_future *
- include_futures (rx, df, state, superstate)
- struct rx *rx;
- struct rx_distinct_future *df;
- struct rx_nfa_state *state;
- struct rx_superstate *superstate;
- #endif
- {
- struct rx_possible_future *future;
- struct rx_cache * cache = rx->cache;
- for (future = state->futures; future; future = future->next)
- {
- struct rx_distinct_future *dfp;
- struct rx_distinct_future *insert_before = 0;
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
- if (dfp->effects == future->effects)
- break;
- else
- {
- int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
- if (order > 0)
- {
- insert_before = dfp;
- dfp = 0;
- break;
- }
- }
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = df;
- if (!dfp)
- {
- dfp
- = ((struct rx_distinct_future *)
- rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
- sizeof (struct rx_distinct_future)));
- if (!dfp)
- return 0;
- if (!df)
- {
- df = insert_before = dfp;
- df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
- }
- else if (!insert_before)
- insert_before = df;
- else if (insert_before == df)
- df = dfp;
- dfp->next_same_super_edge[0] = insert_before;
- dfp->next_same_super_edge[1]
- = insert_before->next_same_super_edge[1];
- dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
- dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
- dfp->next_same_dest = dfp->prev_same_dest = dfp;
- dfp->future = 0;
- dfp->present = superstate;
- dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
- dfp->future_frame.data = 0;
- dfp->future_frame.data_2 = (void *) dfp;
- dfp->side_effects_frame.inx
- = rx->instruction_table[rx_do_side_effects];
- dfp->side_effects_frame.data = 0;
- dfp->side_effects_frame.data_2 = (void *) dfp;
- dfp->effects = future->effects;
- }
- }
- return df;
- }
- /* This constructs a new superstate from its state set. The only
- * complexity here is memory management.
- */
- #ifdef __STDC__
- RX_DECL struct rx_superstate *
- rx_superstate (struct rx *rx,
- struct rx_superset *set)
- #else
- RX_DECL struct rx_superstate *
- rx_superstate (rx, set)
- struct rx *rx;
- struct rx_superset *set;
- #endif
- {
- struct rx_cache * cache = rx->cache;
- struct rx_superstate * superstate = 0;
- /* Does the superstate already exist in the cache? */
- if (set->superstate)
- {
- if (set->superstate->rx_id != rx->rx_id)
- {
- /* Aha. It is in the cache, but belongs to a superstate
- * that refers to an NFA that no longer exists.
- * (We know it no longer exists because it was evidently
- * stored in the same region of memory as the current nfa
- * yet it has a different id.)
- */
- superstate = set->superstate;
- if (!superstate->is_semifree)
- {
- if (cache->lru_superstate == superstate)
- {
- cache->lru_superstate = superstate->next_recyclable;
- if (cache->lru_superstate == superstate)
- cache->lru_superstate = 0;
- }
- {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- if (!cache->semifree_superstate)
- {
- (cache->semifree_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable
- = superstate);
- }
- else
- {
- superstate->next_recyclable = cache->semifree_superstate;
- superstate->prev_recyclable
- = cache->semifree_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable
- = superstate;
- superstate->prev_recyclable->next_recyclable
- = superstate;
- cache->semifree_superstate = superstate;
- }
- ++cache->semifree_superstates;
- }
- }
- set->superstate = 0;
- goto handle_cache_miss;
- }
- ++cache->hits;
- superstate = set->superstate;
- rx_refresh_this_superstate (cache, superstate);
- return superstate;
- }
- handle_cache_miss:
- /* This point reached only for cache misses. */
- ++cache->misses;
- #if RX_DEBUG
- if (rx_debug_trace > 1)
- {
- struct rx_superset * setp = set;
- fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
- while (setp)
- {
- fprintf (stderr, "%d ", setp->id);
- setp = setp->cdr;
- }
- fprintf (stderr, "(%d)n", set);
- }
- #endif
- superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
- if (!superstate)
- return 0;
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable
- = superstate);
- else
- {
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
- ( superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable->prev_recyclable
- = superstate);
- }
- superstate->rx_id = rx->rx_id;
- superstate->transition_refs = 0;
- superstate->locks = 0;
- superstate->is_semifree = 0;
- set->superstate = superstate;
- superstate->contents = set;
- rx_protect_superset (rx, set);
- superstate->edges = 0;
- {
- int x;
- /* None of the transitions from this superstate are known yet. */
- for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
- {
- struct rx_inx * ifr = &superstate->transitions[x];
- ifr->inx = rx->instruction_table [rx_cache_miss];
- ifr->data = ifr->data_2 = 0;
- }
- }
- return superstate;
- }
- /* This computes the destination set of one edge of the superstate NFA.
- * Note that a RX_DISTINCT_FUTURE is a superstate edge.
- * Returns 0 on an allocation failure.
- */
- #ifdef __STDC__
- static int
- solve_destination (struct rx *rx, struct rx_distinct_future *df)
- #else
- static int
- solve_destination (rx, df)
- struct rx *rx;
- struct rx_distinct_future *df;
- #endif
- {
- struct rx_super_edge *tc = df->edge;
- struct rx_superset *nfa_state;
- struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
- struct rx_superset *solution = nil_set;
- struct rx_superstate *dest;
- rx_protect_superset (rx, solution);
- /* Iterate over all NFA states in the state set of this superstate. */
- for (nfa_state = df->present->contents;
- nfa_state->car;
- nfa_state = nfa_state->cdr)
- {
- struct rx_nfa_edge *e;
- /* Iterate over all edges of each NFA state. */
- for (e = nfa_state->car->edges; e; e = e->next)
- /* If we find an edge that is labeled with
- * the characters we are solving for.....
- */
- if (rx_bitset_is_subset (rx->local_cset_size,
- tc->cset, e->params.cset))
- {
- struct rx_nfa_state *n = e->dest;
- struct rx_possible_future *pf;
- /* ....search the partial epsilon closures of the destination
- * of that edge for a path that involves the same set of
- * side effects we are solving for.
- * If we find such a RX_POSSIBLE_FUTURE, we add members to the
- * stateset we are computing.
- */
- for (pf = n->futures; pf; pf = pf->next)
- if (pf->effects == df->effects)
- {
- struct rx_superset * old_sol;
- old_sol = solution;
- solution = rx_superstate_eclosure_union (rx, solution,
- pf->destset);
- if (!solution)
- return 0;
- rx_protect_superset (rx, solution);
- rx_release_superset (rx, old_sol);
- }
- }
- }
- /* It is possible that the RX_DISTINCT_FUTURE we are working on has
- * the empty set of NFA states as its definition. In that case, this
- * is a failure point.
- */
- if (solution == nil_set)
- {
- df->future_frame.inx = (void *) rx_backtrack;
- df->future_frame.data = 0;
- df->future_frame.data_2 = 0;