通讯编程

开发平台：

Visual C++

cweave.w：源码内容

case question: @<Cases for |question|@>; @+break;
case unop: @<Cases for |unop|@>; @+break;
case unorbinop: @<Cases for |unorbinop|@>; @+break;
case binop: @<Cases for |binop|@>; @+break;
case cast: @<Cases for |cast|@>; @+break;
case sizeof_like: @<Cases for |sizeof_like|@>; @+break;
case int_like: @<Cases for |int_like|@>; @+break;
case decl_head: @<Cases for |decl_head|@>; @+break;
case decl: @<Cases for |decl|@>; @+break;
case typedef_like: @<Cases for |typedef_like|@>; @+break;
case struct_like: @<Cases for |struct_like|@>; @+break;
case struct_head: @<Cases for |struct_head|@>; @+break;
case fn_decl: @<Cases for |fn_decl|@>; @+break;
case function: @<Cases for |function|@>; @+break;
case lbrace: @<Cases for |lbrace|@>; @+break;
case do_like: @<Cases for |do_like|@>; @+break;
case if_like: @<Cases for |if_like|@>; @+break;
case for_like: @<Cases for |for_like|@>; @+break;
case else_like: @<Cases for |else_like|@>; @+break;
case if_clause: @<Cases for |if_clause|@>; @+break;
case if_head: @<Cases for |if_head|@>; @+break;
case else_head: @<Cases for |else_head|@>; @+break;
case case_like: @<Cases for |case_like|@>; @+break;
case stmt: @<Cases for |stmt|@>; @+break;
case tag: @<Cases for |tag|@>; @+break;
case semi: @<Cases for |semi|@>; @+break;
case lproc: @<Cases for |lproc|@>; @+break;
case section_scrap: @<Cases for |section_scrap|@>; @+break;
case insert: @<Cases for |insert|@>; @+break;
case prelangle: @<Cases for |prelangle|@>; @+break;
case prerangle: @<Cases for |prerangle|@>; @+break;
case langle: @<Cases for |langle|@>; @+break;
case public_like: @<Cases for |public_like|@>; @+break;
case colcol: @<Cases for |colcol|@>; @+break;
case new_like: @<Cases for |new_like|@>; @+break;
case operator_like: @<Cases for |operator_like|@>; @+break;
case catch_like: @<Cases for |catch_like|@>; @+break;
case base: @<Cases for |base|@>; @+break;
case raw_rpar: @<Cases for |raw_rpar|@>; @+break;
case raw_unorbin: @<Cases for |raw_unorbin|@>; @+break;
case const_like: @<Cases for |const_like|@>; @+break;
case raw_int: @<Cases for |raw_int|@>; @+break;
}
pp++; /* if no match was found, we move to the right */
}
@ In CEE/, new specifier names can be defined via |typedef|, and we want
to make the parser recognize future occurrences of the identifier thus
defined as specifiers. This is done by the procedure |make_reserved|,
which changes the |ilk| of the relevant identifier.
We first need a procedure to recursively seek the first
identifier in a token list, because the identifier might
be enclosed in parentheses, as when one defines a function
returning a pointer.
@d no_ident_found 0 /* distinct from any identifier token */
@c
token_pointer
find_first_ident(p)
text_pointer p;
{
token_pointer q; /* token to be returned */
token_pointer j; /* token being looked at */
sixteen_bits r; /* remainder of token after the flag has been stripped off */
if (p>=text_ptr) confusion("find_first_ident");
for (j=*p; j<*(p+1); j++) {
r=*j%id_flag;
switch (*j/id_flag) {
case 1: case 2: return j;
case 4: case 5: /* |tok_flag| or |inner_tok_flag| */
if ((q=find_first_ident(tok_start+r))!=no_ident_found)
return q;
default: ; /* char, |section_flag|, fall thru: move on to next token */
if (*j==inserted) return no_ident_found; /* ignore inserts */
}
}
return no_ident_found;
}
@ The scraps currently being parsed must be inspected for any
occurrence of the identifier that we're making reserved; hence
the |for| loop below.
@c
void
make_reserved(p) /* make the first identifier in |p->trans| like |int| */
scrap_pointer p;
{
sixteen_bits tok_value; /* the name of this identifier, plus its flag*/
token_pointer tok_loc; /* pointer to |tok_value| */
if ((tok_loc=find_first_ident(p->trans))==no_ident_found)
return; /* this should not happen */
tok_value=*tok_loc;
for (;p<=scrap_ptr; p==lo_ptr? p=hi_ptr: p++) {
if (p->cat==exp) {
if (**(p->trans)==tok_value) {
p->cat=raw_int;
**(p->trans)=tok_value%id_flag+res_flag;
}
}
}
(name_dir+(sixteen_bits)(tok_value%id_flag))->ilk=raw_int;
*tok_loc=tok_value%id_flag+res_flag;
}
@ In the following situations we want to mark the occurrence of
an identifier as a definition: when |make_reserved| is just about to be
used; after a specifier, as in |char **argv|;
before a colon, as in \{found}:; and in the declaration of a function,
as in \{main}()${ldots;}$. This is accomplished by the invocation
of |make_underlined| at appropriate times. Notice that, in the declaration
of a function, we only find out that the identifier is being defined after
it has been swallowed up by an |exp|.
@c
void
make_underlined(p)
/* underline the entry for the first identifier in |p->trans| */
scrap_pointer p;
{
token_pointer tok_loc; /* where the first identifier appears */
if ((tok_loc=find_first_ident(p->trans))==no_ident_found)
return; /* this happens after parsing the |()| in |double f();| */
xref_switch=def_flag;
underline_xref(*tok_loc%id_flag+name_dir);
}
@ We cannot use |new_xref| to underline a cross-reference at this point
because this would just make a new cross-reference at the end of the list.
We actually have to search through the list for the existing
cross-reference.
@<Predecl...@>=
void underline_xref();
@ @c
void
underline_xref(p)
name_pointer p;
{
xref_pointer q=(xref_pointer)p->xref; /* pointer to cross-reference being examined */
xref_pointer r; /* temporary pointer for permuting cross-references */
sixteen_bits m; /* cross-reference value to be installed */
sixteen_bits n; /* cross-reference value being examined */
if (no_xref) return;
m=section_count+xref_switch;
while (q != xmem) {
n=q->num;
if (n==m) return;
else if (m==n+def_flag) {
q->num=m; return;
}
else if (n>=def_flag && n<m) break;
q=q->xlink;
}
@<Insert new cross-reference at |q|, not at beginning of list@>;
}
@ We get to this section only when the identifier is one letter long,
so it didn't get a non-underlined entry during phase one. But it may
have got some explicitly underlined entries in later sections, so in order
to preserve the numerical order of the entries in the index, we have
to insert the new cross-reference not at the beginning of the list
(namely, at |p->xref|), but rather right before |q|.
@<Insert new cross-reference at |q|...@>=
append_xref(0); /* this number doesn't matter */
xref_ptr->xlink=(xref_pointer)p->xref; r=xref_ptr;
p->xref=(char*)xref_ptr;
while (r->xlink!=q) {r->num=r->xlink->num; r=r->xlink;}
r->num=m; /* everything from |q| on is left undisturbed */
@ Now comes the code that tries to match each production starting
with a particular type of scrap. Whenever a match is discovered,
the |squash| or |reduce| macro will cause the appropriate action
to be performed, followed by |goto found|.
@<Cases for |exp|@>=
if (cat1==lbrace || cat1==int_like || cat1==decl) {
make_underlined(pp); big_app1(pp); big_app(indent); app(indent);
reduce(pp,1,fn_decl,0,1);
}
else if (cat1==unop) squash(pp,2,exp,-2,2);
else if ((cat1==binop || cat1==unorbinop) && cat2==exp)
squash(pp,3,exp,-2,3);
else if (cat1==comma && cat2==exp) {
big_app2(pp);
app(opt); app('9'); big_app1(pp+2); reduce(pp,3,exp,-2,4);
}
else if (cat1==exp || cat1==cast) squash(pp,2,exp,-2,5);
else if (cat1==semi) squash(pp,2,stmt,-1,6);
else if (cat1==colon) {
make_underlined (pp); squash(pp,2,tag,0,7);
}
else if (cat1==base) {
if (cat2==int_like && cat3==comma) {
big_app1(pp+1); big_app(' '); big_app2(pp+2);
app(opt); app('9'); reduce(pp+1,3,base,0,8);
}
else if (cat2==int_like && cat3==lbrace) {
big_app1(pp); big_app(' '); big_app1(pp+1); big_app(' '); big_app1(pp+2);
reduce(pp,3,exp,-1,9);
}
}
else if (cat1==rbrace) squash(pp,1,stmt,-1,10);
@ @<Cases for |lpar|@>=
if ((cat1==exp||cat1==unorbinop) && cat2==rpar) squash(pp,3,exp,-2,11);
else if (cat1==rpar) {
big_app1(pp); app('\'); app(','); big_app1(pp+1);
@.\,@>
reduce(pp,2,exp,-2,12);
}
else if (cat1==decl_head || cat1==int_like || cat1==exp) {
if (cat2==rpar) squash(pp,3,cast,-2,13);
else if (cat2==comma) {
big_app3(pp); app(opt); app('9'); reduce(pp,3,lpar,0,14);
}
}
else if (cat1==stmt || cat1==decl) {
big_app2(pp); big_app(' '); reduce(pp,2,lpar,0,15);
}
@ @<Cases for |question|@>=
if (cat1==exp && cat2==colon) squash(pp,3,binop,-2,16);
@ @<Cases for |unop|@>=
if (cat1==exp || cat1==int_like) squash(pp,2,cat1,-2,17);
@ @<Cases for |unorbinop|@>=
if (cat1==exp || cat1==int_like) {
big_app('{'); big_app1(pp); big_app('}'); big_app1(pp+1);
reduce(pp,2,cat1,-2,18);
}
else if (cat1==binop) {
big_app(math_rel); big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
big_app('}'); reduce(pp,2,binop,-1,19);
}
@ @<Cases for |binop|@>=
if (cat1==binop) {
big_app(math_rel); big_app('{'); big_app1(pp); big_app('}');
big_app('{'); big_app1(pp+1); big_app('}');
big_app('}'); reduce(pp,2,binop,-1,20);
}
@ @<Cases for |cast|@>=
if (cat1==exp) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,21);
}
else if (cat1==semi) squash(pp,1,exp,-2,22);
@ @<Cases for |sizeof_like|@>=
if (cat1==cast) squash(pp,2,exp,-2,23);
else if (cat1==exp) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,24);
}
@ @<Cases for |int_like|@>=
if (cat1==int_like|| cat1==struct_like) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,cat1,-2,25);
}
else if (cat1==exp && (cat2==raw_int||cat2==struct_like))
squash(pp,2,int_like,-2,26);
else if (cat1==exp || cat1==unorbinop || cat1==semi) {
big_app1(pp);
if (cat1!=semi) big_app(' ');
reduce(pp,1,decl_head,-1,27);
}
else if (cat1==colon) {
big_app1(pp); big_app(' '); reduce(pp,1,decl_head,0,28);
}
else if (cat1==prelangle) squash(pp+1,1,langle,1,29);
else if (cat1==colcol && (cat2==exp||cat2==int_like)) squash(pp,3,cat2,-2,30);
else if (cat1==cast) {
if (cat2==lbrace) {
big_app2(pp); big_app(indent); big_app(indent);
reduce(pp,2,fn_decl,1,31);
}
else squash(pp,2,int_like,-2,32);
}
@ @<Cases for |decl_head|@>=
if (cat1==comma) {
big_app2(pp); big_app(' '); reduce(pp,2,decl_head,-1,33);
}
else if (cat1==unorbinop) {
big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
reduce(pp,2,decl_head,-1,34);
}
else if (cat1==exp && cat2!=lpar && cat2!=exp) {
make_underlined(pp+1); squash(pp,2,decl_head,-1,35);
}
else if ((cat1==binop||cat1==colon) && cat2==exp && (cat3==comma ||
cat3==semi || cat3==rpar))
squash(pp,3,decl_head,-1,36);
else if (cat1==cast) squash(pp,2,decl_head,-1,37);
else if (cat1==lbrace || (cat1==int_like&&cat2!=colcol) || cat1==decl) {
big_app1(pp); big_app(indent); app(indent); reduce(pp,1,fn_decl,0,38);
}
else if (cat1==semi) squash(pp,2,decl,-1,39);
@ @<Cases for |decl|@>=
if (cat1==decl) {
big_app1(pp); big_app(force); big_app1(pp+1);
reduce(pp,2,decl,-1,40);
}
else if (cat1==stmt || cat1==function) {
big_app1(pp); big_app(big_force);
big_app1(pp+1); reduce(pp,2,cat1,-1,41);
}
@ @<Cases for |typedef_like|@>=
if (cat1==decl_head)
if ((cat2==exp&&cat3!=lpar&&cat3!=exp)||cat2==int_like) {
make_underlined(pp+2); make_reserved(pp+2);
big_app2(pp+1); reduce(pp+1,2,decl_head,0,42);
}
else if (cat2==semi) {
big_app1(pp); big_app(' '); big_app2(pp+1); reduce(pp,3,decl,-1,43);
}
@ @<Cases for |struct_like|@>=
if (cat1==lbrace) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,struct_head,0,44);
}
else if (cat1==exp||cat1==int_like) {
if (cat2==lbrace || cat2==semi) {
make_underlined(pp+1); make_reserved(pp+1);
big_app1(pp); big_app(' '); big_app1(pp+1);
if (cat2==semi) reduce(pp,2,decl_head,0,45);
else {
big_app(' '); big_app1(pp+2);reduce(pp,3,struct_head,0,46);
}
}
else if (cat2==colon) squash(pp+2,1,base,-1,47);
else if (cat2!=base) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,int_like,-2,48);
}
}
@ @<Cases for |struct_head|@>=
if ((cat1==decl || cat1==stmt || cat1==function) && cat2==rbrace) {
big_app1(pp); big_app(indent); big_app(force); big_app1(pp+1);
big_app(outdent); big_app(force); big_app1(pp+2);
reduce(pp,3,int_like,-2,49);
}
else if (cat1==rbrace) {
big_app1(pp); app_str("\,"); big_app1(pp+1);
@.\,@>
reduce(pp,2,int_like,-2,50);
}
@ @<Cases for |fn_decl|@>=
if (cat1==decl) {
big_app1(pp); big_app(force); big_app1(pp+1); reduce(pp,2,fn_decl,0,51);
}
else if (cat1==stmt) {
big_app1(pp); app(outdent); app(outdent); big_app(force);
big_app1(pp+1); reduce(pp,2,function,-1,52);
}
@ @<Cases for |function|@>=
if (cat1==function || cat1==decl || cat1==stmt) {
big_app1(pp); big_app(big_force); big_app1(pp+1); reduce(pp,2,cat1,-1,53);
}
@ @<Cases for |lbrace|@>=
if (cat1==rbrace) {
big_app1(pp); app('\'); app(','); big_app1(pp+1);
@.\,@>
reduce(pp,2,stmt,-1,54);
}
else if ((cat1==stmt||cat1==decl||cat1==function) && cat2==rbrace) {
big_app(force); big_app1(pp); big_app(indent); big_app(force);
big_app1(pp+1); big_app(force); big_app(backup); big_app1(pp+2);
big_app(outdent); big_app(force); reduce(pp,3,stmt,-1,55);
}
else if (cat1==exp) {
if (cat2==rbrace) squash(pp,3,exp,-2,56);
else if (cat2==comma && cat3==rbrace) squash(pp,4,exp,-2,56);
}
@ @<Cases for |if_like|@>=
if (cat1==exp) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,if_clause,0,57);
}
@ @<Cases for |for_like|@>=
if (cat1==exp) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,else_like,-2,58);
}
@ @<Cases for |else_like|@>=
if (cat1==lbrace) squash(pp,1,else_head,0,59);
else if (cat1==stmt) {
big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
big_app1(pp+1); big_app(outdent); big_app(force);
reduce(pp,2,stmt,-1,60);
}
@ @<Cases for |else_head|@>=
if (cat1==stmt || cat1==exp) {
big_app(force); big_app1(pp); big_app(break_space); app(noop);
big_app(cancel); big_app1(pp+1); big_app(force);
reduce(pp,2,stmt,-1,61);
}
@ @<Cases for |if_clause|@>=
if (cat1==lbrace) squash(pp,1,if_head,0,62);
else if (cat1==stmt) {
if (cat2==else_like) {
big_app(force); big_app1(pp); big_app(indent); big_app(break_space);
big_app1(pp+1); big_app(outdent); big_app(force); big_app1(pp+2);
if (cat3==if_like) {
big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,63);
}@+else reduce(pp,3,else_like,0,64);
}
else squash(pp,1,else_like,0,65);
}
@ @<Cases for |if_head|@>=
if (cat1==stmt || cat1==exp) {
if (cat2==else_like) {
big_app(force); big_app1(pp); big_app(break_space); app(noop);
big_app(cancel); big_app1(pp+1); big_app(force); big_app1(pp+2);
if (cat3==if_like) {
big_app(' '); big_app1(pp+3); reduce(pp,4,if_like,0,66);
}@+else reduce(pp,3,else_like,0,67);
}
else squash(pp,1,else_head,0,68);
}
@ @<Cases for |do_like|@>=
if (cat1==stmt && cat2==else_like && cat3==semi) {
big_app1(pp); big_app(break_space); app(noop); big_app(cancel);
big_app1(pp+1); big_app(cancel); app(noop); big_app(break_space);
big_app2(pp+2); reduce(pp,4,stmt,-1,69);
}
@ @<Cases for |case_like|@>=
if (cat1==semi) squash(pp,2,stmt,-1,70);
else if (cat1==colon) squash(pp,2,tag,-1,71);
else if (cat1==exp) {
if (cat2==semi) {
big_app1(pp); big_app(' '); big_app1(pp+1); big_app1(pp+2);
reduce(pp,3,stmt,-1,72);
}
else if (cat2==colon) {
big_app1(pp); big_app(' '); big_app1(pp+1); big_app1(pp+2);
reduce(pp,3,tag,-1,73);
}
}
@ @<Cases for |tag|@>=
if (cat1==tag) {
big_app1(pp); big_app(break_space); big_app1(pp+1); reduce(pp,2,tag,-1,74);
}
else if (cat1==stmt||cat1==decl||cat1==function) {
big_app(force); big_app(backup); big_app1(pp); big_app(break_space);
big_app1(pp+1); reduce(pp,2,cat1,-1,75);
}
@ The user can decide at run-time whether short statements should be
grouped together on the same line.
@d force_lines flags['f'] /* should each statement be on its own line? */
@<Cases for |stmt|@>=
if (cat1==stmt||cat1==decl||cat1==function) {
big_app1(pp);
if (cat1==function) big_app(big_force);
else if (cat1==decl) big_app(big_force);
else if (force_lines) big_app(force);
else big_app(break_space);
big_app1(pp+1); reduce(pp,2,cat1,-1,76);
}
@ @<Cases for |semi|@>=
big_app(' '); big_app1(pp); reduce(pp,1,stmt,-1,77);
@ @<Cases for |lproc|@>=
if (cat1==define_like) make_underlined(pp+2);
if (cat1==else_like || cat1==if_like ||cat1==define_like)
squash(pp,2,lproc,0,78);
else if (cat1==rproc) {
app(inserted); big_app2(pp); reduce(pp,2,insert,-1,79);
} else if (cat1==exp || cat1==function) {
if (cat2==rproc) {
app(inserted); big_app1(pp); big_app(' '); big_app2(pp+1);
reduce(pp,3,insert,-1,80);
}
else if (cat2==exp && cat3==rproc && cat1==exp) {
app(inserted); big_app1(pp); big_app(' '); big_app1(pp+1); app_str(" \5");
@.\5@>
big_app2(pp+2); reduce(pp,4,insert,-1,80);
}
}
@ @<Cases for |section_scrap|@>=
if (cat1==semi) {
big_app2(pp); big_app(force); reduce(pp,2,stmt,-2,81);
}
else squash(pp,1,exp,-2,82);
@ @<Cases for |insert|@>=
if (cat1)
squash(pp,2,cat1,0,83);
@ @<Cases for |prelangle|@>=
init_mathness=cur_mathness=yes_math;
app('<'); reduce(pp,1,binop,-2,84);
@ @<Cases for |prerangle|@>=
init_mathness=cur_mathness=yes_math;
app('>'); reduce(pp,1,binop,-2,85);
@ @<Cases for |langle|@>=
if (cat1==exp && cat2==prerangle) squash(pp,3,cast,-1,86);
else if (cat1==prerangle) {
big_app1(pp); app('\'); app(','); big_app1(pp+1);
@.\,@>
reduce(pp,2,cast,-1,87);
}
else if (cat1==decl_head || cat1==int_like) {
if (cat2==prerangle) squash(pp,3,cast,-1,88);
else if (cat2==comma) {
big_app3(pp); app(opt); app('9'); reduce(pp,3,langle,0,89);
}
}
@ @<Cases for |public_like|@>=
if (cat1==colon) squash(pp,2,tag,-1,90);
else squash(pp,1,int_like,-2,91);
@ @<Cases for |colcol|@>=
if (cat1==exp||cat1==int_like) squash(pp,2,cat1,-2,92);
@ @<Cases for |new_like|@>=
if (cat1==exp || (cat1==raw_int&&cat2!=prelangle&&cat2!=langle)) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,new_like,0,93);
}
else if (cat1==raw_unorbin || cat1==colcol)
squash(pp,2,new_like,0,94);
else if (cat1==cast) squash(pp,2,exp,-2,95);
else if (cat1!=lpar && cat1!=raw_int && cat1!=struct_like)
squash(pp,1,exp,-2,96);
@ @<Cases for |operator_like|@>=
if (cat1==binop || cat1==unop || cat1==unorbinop) {
if (cat2==binop) break;
big_app1(pp); big_app('{'); big_app1(pp+1); big_app('}');
reduce(pp,2,exp,-2,97);
}
else if (cat1==new_like || cat1==sizeof_like) {
big_app1(pp); big_app(' '); big_app1(pp+1); reduce(pp,2,exp,-2,98);
}
else squash(pp,1,new_like,0,99);
@ @<Cases for |catch_like|@>=
if (cat1==cast || cat1==exp) {
big_app2(pp); big_app(indent); big_app(indent);
reduce(pp,2,fn_decl,0,100);
}
@ @<Cases for |base|@>=
if (cat1==public_like && cat2==exp) {
if (cat3==comma) {
big_app2(pp); big_app(' '); big_app2(pp+2);
reduce(pp,4,base,0,101);
} else {
big_app1(pp+1); big_app(' '); big_app1(pp+2);
reduce(pp+1,2,int_like,-1,102);
}
}
@ @<Cases for |raw_rpar|@>=
if (cat1==const_like && @|
(cat2==semi || cat2==lbrace || cat2==comma || cat2==binop
|| cat2==const_like)) {
big_app1(pp); big_app(' ');
big_app1(pp+1); reduce(pp,2,raw_rpar,0,103);
} else squash(pp,1,rpar,-3,104);
@ @<Cases for |raw_unorbin|@>=
if (cat1==const_like) {
big_app2(pp); app_str("\ "); reduce(pp,2,raw_unorbin,0,105);
@.\ @>
} else squash(pp,1,unorbinop,-2,106);
@ @<Cases for |const_like|@>=
squash(pp,1,int_like,-2,107);
@ @<Cases for |raw_int|@>=
if (cat1==lpar) squash(pp,1,exp,-2,108);
else squash(pp,1,int_like,-3,109);
@ The `|freeze_text|' macro is used to give official status to a token list.
Before saying |freeze_text|, items are appended to the current token list,
and we know that the eventual number of this token list will be the current
value of |text_ptr|. But no list of that number really exists as yet,
because no ending point for the current list has been
stored in the |tok_start| array. After saying |freeze_text|, the
old current token list becomes legitimate, and its number is the current
value of |text_ptr-1| since |text_ptr| has been increased. The new
current token list is empty and ready to be appended to.
Note that |freeze_text| does not check to see that |text_ptr| hasn't gotten
too large, since it is assumed that this test was done beforehand.
@d freeze_text *(++text_ptr)=tok_ptr
@ Here's the |reduce| procedure used in our code for productions:
@c
void
reduce(j,k,c,d,n)
scrap_pointer j;
eight_bits c;
short k, d, n;
{
scrap_pointer i, i1; /* pointers into scrap memory */
j->cat=c; j->trans=text_ptr;
j->mathness=4*cur_mathness+init_mathness;
freeze_text;
if (k>1) {
for (i=j+k, i1=j+1; i<=lo_ptr; i++, i1++) {
i1->cat=i->cat; i1->trans=i->trans;
i1->mathness=i->mathness;
}
lo_ptr=lo_ptr-k+1;
}
@<Change |pp| to $max(|scrap_base|,|pp|+d)$@>;
@<Print a snapshot of the scrap list if debugging @>;
pp--; /* we next say |pp++| */
}
@ @<Change |pp| to $max...@>=
if (pp+d>=scrap_base) pp=pp+d;
else pp=scrap_base;
@ Here's the |squash| procedure, which
takes advantage of the simplification that occurs when |k==1|.
@c
void
squash(j,k,c,d,n)
scrap_pointer j;
eight_bits c;
short k, d, n;
{
scrap_pointer i; /* pointers into scrap memory */
if (k==1) {
j->cat=c; @<Change |pp|...@>;
@<Print a snapshot...@>;
pp--; /* we next say |pp++| */
return;
}
for (i=j; i<j+k; i++) big_app1(i);
reduce(j,k,c,d,n);
}
@ Here now is the code that applies productions as long as possible.
Before applying the production mechanism, we must make sure
it has good input (at least four scraps, the length of the lhs of the
longest rules), and that there is enough room in the memory arrays
to hold the appended tokens and texts. Here we use a very
conservative test: it's more important to make sure the program
will still work if we change the production rules (within reason)
than to squeeze the last bit of space from the memory arrays.
@d safe_tok_incr 20
@d safe_text_incr 10
@d safe_scrap_incr 10
@<Reduce the scraps using the productions until no more rules apply@>=
while (1) {
@<Make sure the entries |pp| through |pp+3| of |cat| are defined@>;
if (tok_ptr+safe_tok_incr>tok_mem_end) {
if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
overflow("token");
}
if (text_ptr+safe_text_incr>tok_start_end) {
if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
overflow("text");
}
if (pp>lo_ptr) break;
init_mathness=cur_mathness=maybe_math;
@<Match a production...@>;
}
@ If we get to the end of the scrap list, category codes equal to zero are
stored, since zero does not match anything in a production.
@<Make sure the entries...@>=
if (lo_ptr<pp+3) {
while (hi_ptr<=scrap_ptr && lo_ptr!=pp+3) {
(++lo_ptr)->cat=hi_ptr->cat; lo_ptr->mathness=(hi_ptr)->mathness;
lo_ptr->trans=(hi_ptr++)->trans;
}
for (i=lo_ptr+1;i<=pp+3;i++) i->cat=0;
}
@ If .{CWEAVE} is being run in debugging mode, the production numbers and
current stack categories will be printed out when |tracing| is set to 2;
a sequence of two or more irreducible scraps will be printed out when
|tracing| is set to 1.
@<Global...@>=
int tracing; /* can be used to show parsing details */
@ @<Print a snapsh...@>=
{ scrap_pointer k; /* pointer into |scrap_info| */
if (tracing==2) {
printf("n%d:",n);
for (k=scrap_base; k<=lo_ptr; k++) {
if (k==pp) putxchar('*'); else putxchar(' ');
if (k->mathness %4 == yes_math) putchar('+');
else if (k->mathness %4 == no_math) putchar('-');
print_cat(k->cat);
if (k->mathness /4 == yes_math) putchar('+');
else if (k->mathness /4 == no_math) putchar('-');
}
if (hi_ptr<=scrap_ptr) printf("..."); /* indicate that more is coming */
}
}
@ The |translate| function assumes that scraps have been stored in
positions |scrap_base| through |scrap_ptr| of |cat| and |trans|. It
applies productions as much as
possible. The result is a token list containing the translation of
the given sequence of scraps.
After calling |translate|, we will have |text_ptr+3<=max_texts| and
|tok_ptr+6<=max_toks|, so it will be possible to create up to three token
lists with up to six tokens without checking for overflow. Before calling
|translate|, we should have |text_ptr<max_texts| and |scrap_ptr<max_scraps|,
since |translate| might add a new text and a new scrap before it checks
for overflow.
@c
text_pointer
translate() /* converts a sequence of scraps */
{
scrap_pointer i, /* index into |cat| */
j; /* runs through final scraps */
pp=scrap_base; lo_ptr=pp-1; hi_ptr=pp;
@<If tracing, print an indication of where we are@>;
@<Reduce the scraps...@>;
@<Combine the irreducible scraps that remain@>;
}
@ If the initial sequence of scraps does not reduce to a single scrap,
we concatenate the translations of all remaining scraps, separated by
blank spaces, with dollar signs surrounding the translations of scraps
where appropriate.
@<Combine the irreducible...@>= {
@<If semi-tracing, show the irreducible scraps@>;
for (j=scrap_base; j<=lo_ptr; j++) {
if (j!=scrap_base) app(' ');
if (j->mathness % 4 == yes_math) app('$');
app1(j);
if (j->mathness / 4 == yes_math) app('$');
if (tok_ptr+6>tok_mem_end) overflow("token");
}
freeze_text; return(text_ptr-1);
}
@ @<If semi-tracing, show the irreducible scraps@>=
if (lo_ptr>scrap_base && tracing==1) {
printf("nIrreducible scrap sequence in section %d:",section_count);
@.Irreducible scrap sequence...@>
mark_harmless;
for (j=scrap_base; j<=lo_ptr; j++) {
printf(" "); print_cat(j->cat);
}
}
@ @<If tracing,...@>=
if (tracing==2) {
printf("nTracing after l. %d:n",cur_line); mark_harmless;
@.Tracing after...@>
if (loc>buffer+50) {
printf("...");
term_write(loc-51,51);
}
else term_write(buffer,loc-buffer);
}
@* Initializing the scraps.
If we are going to use the powerful production mechanism just developed, we
must get the scraps set up in the first place, given a CEE/ text. A table
of the initial scraps corresponding to CEE/ tokens appeared above in the
section on parsing; our goal now is to implement that table. We shall do this
by implementing a subroutine called |C_parse| that is analogous to the
|C_xref| routine used during phase one.
Like |C_xref|, the |C_parse| procedure starts with the current
value of |next_control| and it uses the operation |next_control=get_next()|
repeatedly to read CEE/ text until encountering the next `.{v}' or
`.{/*}', or until |next_control>=format_code|. The scraps corresponding to
what it reads are appended into the |cat| and |trans| arrays, and |scrap_ptr|
is advanced.
@c
void
C_parse(spec_ctrl) /* creates scraps from CEE/ tokens */
eight_bits spec_ctrl;
{
int count; /* characters remaining before string break */
while (next_control<format_code || next_control==spec_ctrl) {
@<Append the scrap appropriate to |next_control|@>;
next_control=get_next();
if (next_control=='|' || next_control==begin_comment ||
next_control==begin_short_comment) return;
}
}
@ The following macro is used to append a scrap whose tokens have just
been appended:
@d app_scrap(c,b) {
(++scrap_ptr)->cat=(c); scrap_ptr->trans=text_ptr;
scrap_ptr->mathness=5*(b); /* no no, yes yes, or maybe maybe */
freeze_text;
}
@ @<Append the scr...@>=
@<Make sure that there is room for the new scraps, tokens, and texts@>;
switch (next_control) {
case section_name:
app(section_flag+(int)(cur_section-name_dir));
app_scrap(section_scrap,maybe_math);
app_scrap(exp,yes_math);@+break;
case string: case constant: case verbatim: @<Append a string or constant@>;
@+break;
case identifier: app_cur_id(1);@+break;
case TeX_string: @<Append a TEX/ string, without forming a scrap@>;@+break;
case '/': case '.':
app(next_control); app_scrap(binop,yes_math);@+break;
case '<': app_str("\langle");@+app_scrap(prelangle,yes_math);@+break;
@.\langle@>
case '>': app_str("\rangle");@+app_scrap(prerangle,yes_math);@+break;
@.\rangle@>
case '=': app_str("\K"); app_scrap(binop,yes_math);@+break;
@.\K@>
case '|': app_str("\OR"); app_scrap(binop,yes_math);@+break;
@.\OR@>
case '^': app_str("\XOR"); app_scrap(binop,yes_math);@+break;
@.\XOR@>
case '%': app_str("\MOD"); app_scrap(binop,yes_math);@+break;
@.\MOD@>
case '!': app_str("\R"); app_scrap(unop,yes_math);@+break;
@.\R@>
case '~': app_str("\CM"); app_scrap(unop,yes_math);@+break;
@.\CM@>
case '+': case '-': app(next_control); app_scrap(unorbinop,yes_math);@+break;
case '*': app(next_control); app_scrap(raw_unorbin,yes_math);@+break;
case '&': app_str("\AND"); app_scrap(raw_unorbin,yes_math);@+break;
@.\AND@>
case '?': app_str("\?"); app_scrap(question,yes_math);@+break;
@.\?@>
case '#': app_str("\#"); app_scrap(unorbinop,yes_math);@+break;
@.\#@>
case ignore: case xref_roman: case xref_wildcard:
case xref_typewriter: case noop:@+break;
case '(': case '[': app(next_control); app_scrap(lpar,maybe_math);@+break;
case ')': case ']': app(next_control); app_scrap(raw_rpar,maybe_math);@+break;
case '{': app_str("\{"@q}@>); app_scrap(lbrace,yes_math);@+break;
@.\{@>@q}@>
case '}': app_str(@q{@>"\}"); app_scrap(rbrace,yes_math);@+break;
@q{@>@.\}@>
case ',': app(','); app_scrap(comma,yes_math);@+break;
case ';': app(';'); app_scrap(semi,maybe_math);@+break;
case ':': app(':'); app_scrap(colon,maybe_math);@+break;@/
@t4@> @<Cases involving nonstandard characters@>@;
case thin_space: app_str("\,"); app_scrap(insert,maybe_math);@+break;
@.\,@>
case math_break: app(opt); app_str("0");
app_scrap(insert,maybe_math);@+break;
case line_break: app(force); app_scrap(insert,no_math);@+break;
case left_preproc: app(force); app(preproc_line);
app_str("\#"); app_scrap(lproc,no_math);@+break;
@.\#@>
case right_preproc: app(force); app_scrap(rproc,no_math);@+break;
case big_line_break: app(big_force); app_scrap(insert,no_math);@+break;
case no_line_break: app(big_cancel); app(noop); app(break_space);
app(noop); app(big_cancel);
app_scrap(insert,no_math);@+break;
case pseudo_semi: app_scrap(semi,maybe_math);@+break;
case macro_arg_open: app_scrap(begin_arg,maybe_math);@+break;
case macro_arg_close: app_scrap(end_arg,maybe_math);@+break;
case join: app_str("\J"); app_scrap(insert,no_math);@+break;
@.\J@>
case output_defs_code: app(force); app_str("\ATH"); app(force);
app_scrap(insert,no_math);@+break;
@.\ATH@>
default: app(inserted); app(next_control);
app_scrap(insert,maybe_math);@+break;
}
@ @<Make sure that there is room for the new...@>=
if (scrap_ptr+safe_scrap_incr>scrap_info_end ||
tok_ptr+safe_tok_incr>tok_mem_end @| ||
text_ptr+safe_text_incr>tok_start_end) {
if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
overflow("scrap/token/text");
}
@ Some nonstandard characters may have entered .{CWEAVE} by means of
standard ones. They are converted to TEX/ control sequences so that it is
possible to keep .{CWEAVE} from outputting unusual |char| codes.
@<Cases involving nonstandard...@>=
case not_eq: app_str("\I");@+app_scrap(binop,yes_math);@+break;
@.\I@>
case lt_eq: app_str("\Z");@+app_scrap(binop,yes_math);@+break;
@.\Z@>
case gt_eq: app_str("\G");@+app_scrap(binop,yes_math);@+break;
@.\G@>
case eq_eq: app_str("\E");@+app_scrap(binop,yes_math);@+break;
@.\E@>
case and_and: app_str("\W");@+app_scrap(binop,yes_math);@+break;
@.\W@>
case or_or: app_str("\V");@+app_scrap(binop,yes_math);@+break;
@.\V@>
case plus_plus: app_str("\PP");@+app_scrap(unop,yes_math);@+break;
@.\PP@>
case minus_minus: app_str("\MM");@+app_scrap(unop,yes_math);@+break;
@.\MM@>
case minus_gt: app_str("\MG");@+app_scrap(binop,yes_math);@+break;
@.\MG@>
case gt_gt: app_str("\GG");@+app_scrap(binop,yes_math);@+break;
@.\GG@>
case lt_lt: app_str("\LL");@+app_scrap(binop,yes_math);@+break;
@.\LL@>
case dot_dot_dot: app_str("\,\ldots\,");@+app_scrap(exp,yes_math);@+break;
@.\,@>
@.\ldots@>
case colon_colon: app_str("\DC");@+app_scrap(colcol,maybe_math);@+break;
@.\DC@>
case period_ast: app_str("\PA");@+app_scrap(binop,yes_math);@+break;
@.\PA@>
case minus_gt_ast: app_str("\MGA");@+app_scrap(binop,yes_math);@+break;
@.\MGA@>
@ The following code must use |app_tok| instead of |app| in order to
protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok|
has been used, so another |app| is legitimate before testing again.
Many of the special characters in a string must be prefixed by `.\' so that
TEX/ will print them properly.
@^special string characters@>
@<Append a string or...@>=
count= -1;
if (next_control==constant) app_str("\T{"@q}@>);
@.\T@>
else if (next_control==string) {
count=20; app_str("\.{"@q}@>);
}
@.\.@>
else app_str("\vb{"@q}@>);
@.\vb@>
while (id_first<id_loc) {
if (count==0) { /* insert a discretionary break in a long string */
app_str(@q(@>@q{@>"}\)\.{"@q}@>); count=20;
@q(@>@.\)@>
}
@^high-bit character handling@>
if((eight_bits)(*id_first)>0177) {
app_tok(quoted_char);
app_tok((eight_bits)(*id_first++));
}
else {
switch (*id_first) {
case ' ':case '\':case '#':case '%':case '$':case '^':
case '{': case '}': case '~': case '&': case '_': app('\'); break;
@.\ @>
@.\\@>
@.\#@>
@.\%@>
@.\$@>
@.\^@>
@.\{@>@q}@>
@q{@>@.\}@>
@.\~@>
@.\&@>
@.\_@> @q CWEAVE does quote an underscore! @>
case '@@': if (*(id_first+1)=='@@') id_first++;
else err_print("! Double @@ should be used in strings");
@.Double @@ should be used...@>
}
app_tok(*id_first++);
}
count--;
}
app(@q{@>'}');
app_scrap(exp,maybe_math);
@ We do not make the TEX/ string into a scrap, because there is no
telling what the user will be putting into it; instead we leave it
open, to be picked up by the next scrap. If it comes at the end of a
section, it will be made into a scrap when |finish_C| is called.
There's a known bug here, in cases where an adjacent scrap is
|prelangle| or |prerangle|. Then the TEX/ string can disappear
when the .{\langle} or .{\rangle} becomes .{<} or .{>}.
For example, if the user writes .{v x<@@ty@@>v}, the TEX/ string
.{\hbox{y}} eventually becomes part of an |insert| scrap, which is combined
with a |prelangle| scrap and eventually lost. The best way to work around
this bug is probably to enclose the .{@@t...@@>} in .{@@[...@@]} so that
the TEX/ string is treated as an expression.
@^bug, known@>
@<Append a TEX/ string, without forming a scrap@>=
app_str("\hbox{"@q}@>);
@^high-bit character handling@>
while (id_first<id_loc)
if((eight_bits)(*id_first)>0177) {
app_tok(quoted_char);
app_tok((eight_bits)(*id_first++));
}
else {
if (*id_first=='@@') id_first++;
app_tok(*id_first++);
}
app(@q{@>'}');
@ The function |app_cur_id| appends the current identifier to the
token list; it also builds a new scrap if |scrapping==1|.
@<Predec...@>=
void app_cur_id();
@ @c
void
app_cur_id(scrapping)
boolean scrapping; /* are we making this into a scrap? */
{
name_pointer p=id_lookup(id_first,id_loc,normal);
if (p->ilk<=quoted) { /* not a reserved word */
app(id_flag+(int)(p-name_dir));
if (scrapping) app_scrap(exp,p->ilk>=custom? yes_math: maybe_math);
@.\NULL@>
} else {
app(res_flag+(int)(p-name_dir));
if (scrapping) app_scrap(p->ilk,maybe_math);
}
}
@ When the `.{v}' that introduces CEE/ text is sensed, a call on
|C_translate| will return a pointer to the TEX/ translation of
that text. If scraps exist in |scrap_info|, they are
unaffected by this translation process.
@c
text_pointer
C_translate()
{
text_pointer p; /* points to the translation */
scrap_pointer save_base; /* holds original value of |scrap_base| */
save_base=scrap_base; scrap_base=scrap_ptr+1;
C_parse(section_name); /* get the scraps together */
if (next_control!='|') err_print("! Missing '|' after C text");
@.Missing '|'...@>
app_tok(cancel); app_scrap(insert,maybe_math);
/* place a |cancel| token as a final ``comment'' */
p=translate(); /* make the translation */
if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
scrap_ptr=scrap_base-1; scrap_base=save_base; /* scrap the scraps */
return(p);
}
@ The |outer_parse| routine is to |C_parse| as |outer_xref|
is to |C_xref|: it constructs a sequence of scraps for CEE/ text
until |next_control>=format_code|. Thus, it takes care of embedded comments.
@c
void
outer_parse() /* makes scraps from CEE/ tokens and comments */
{
int bal; /* brace level in comment */
text_pointer p, q; /* partial comments */
while (next_control<format_code)
if (next_control!=begin_comment && next_control!=begin_short_comment)
C_parse(ignore);
else {
boolean is_long_comment=(next_control==begin_comment);
@<Make sure that there is room for the new...@>;
app(cancel); app(inserted);
if (is_long_comment) app_str("\C{"@q}@>);
@.\C@>
else app_str("\SHC{"@q}@>);
@.\SHC@>
bal=copy_comment(is_long_comment,1); next_control=ignore;
while (bal>0) {
p=text_ptr; freeze_text; q=C_translate();
/* at this point we have |tok_ptr+6<=max_toks| */
app(tok_flag+(int)(p-tok_start));
app_str("\PB{"); app(inner_tok_flag+(int)(q-tok_start)); app_tok('}');
@.\PB@>
if (next_control=='|') {
bal=copy_comment(is_long_comment,bal);
next_control=ignore;
}
else bal=0; /* an error has been reported */
}
app(force); app_scrap(insert,no_math);
/* the full comment becomes a scrap */
}
}
@* Output of tokens.
So far our programs have only built up multi-layered token lists in
.{CWEAVE}'s internal memory; we have to figure out how to get them into
the desired final form. The job of converting token lists to characters in
the TEX/ output file is not difficult, although it is an implicitly
recursive process. Four main considerations had to be kept in mind when
this part of .{CWEAVE} was designed. (a) There are two modes of output:
|outer| mode, which translates tokens like |force| into line-breaking
control sequences, and |inner| mode, which ignores them except that blank
spaces take the place of line breaks. (b) The |cancel| instruction applies
to adjacent token or tokens that are output, and this cuts across levels
of recursion since `|cancel|' occurs at the beginning or end of a token
list on one level. (c) The TEX/ output file will be semi-readable if line
breaks are inserted after the result of tokens like |break_space| and
|force|. (d) The final line break should be suppressed, and there should
be no |force| token output immediately after `.{\Y\B}'.
@ The output process uses a stack to keep track of what is going on at
different ``levels'' as the token lists are being written out. Entries on
this stack have three parts:
yskiphang |end_field| is the |tok_mem| location where the token list of a
particular level will end;
yskiphang |tok_field| is the |tok_mem| location from which the next token
on a particular level will be read;
yskiphang |mode_field| is the current mode, either |inner| or |outer|.
yskipnoindent The current values of these quantities are referred to
quite frequently, so they are stored in a separate place instead of in the
|stack| array. We call the current values |cur_end|, |cur_tok|, and
|cur_mode|.
The global variable |stack_ptr| tells how many levels of output are
currently in progress. The end of output occurs when an |end_translation|
token is found, so the stack is never empty except when we first begin the
output process.
@d inner 0 /* value of |mode| for CEE/ texts within TEX/ texts */
@d outer 1 /* value of |mode| for CEE/ texts in sections */
@<Typed...@>= typedef int mode;
typedef struct {
token_pointer end_field; /* ending location of token list */
token_pointer tok_field; /* present location within token list */
boolean mode_field; /* interpretation of control tokens */
} output_state;
typedef output_state *stack_pointer;
@ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */
@d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */
@d cur_mode cur_state.mode_field /* current mode of interpretation */
@d init_stack stack_ptr=stack;cur_mode=outer /* initialize the stack */
@<Global...@>=
output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */
output_state stack[stack_size]; /* info for non-current levels */
stack_pointer stack_ptr; /* first unused location in the output state stack */
stack_pointer stack_end=stack+stack_size-1; /* end of |stack| */
stack_pointer max_stack_ptr; /* largest value assumed by |stack_ptr| */
@ @<Set init...@>=
max_stack_ptr=stack;
@ To insert token-list |p| into the output, the |push_level| subroutine
is called; it saves the old level of output and gets a new one going.
The value of |cur_mode| is not changed.
@c
void
push_level(p) /* suspends the current level */
text_pointer p;
{
if (stack_ptr==stack_end) overflow("stack");
if (stack_ptr>stack) { /* save current state */
stack_ptr->end_field=cur_end;
stack_ptr->tok_field=cur_tok;
stack_ptr->mode_field=cur_mode;
}
stack_ptr++;
if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr;
cur_tok=*p; cur_end=*(p+1);
}
@ Conversely, the |pop_level| routine restores the conditions that were in
force when the current level was begun. This subroutine will never be
called when |stack_ptr==1|.
@c
void
pop_level()
{
cur_end=(--stack_ptr)->end_field;
cur_tok=stack_ptr->tok_field; cur_mode=stack_ptr->mode_field;
}
@ The |get_output| function returns the next byte of output that is not a
reference to a token list. It returns the values |identifier| or |res_word|
or |section_code| if the next token is to be an identifier (typeset in
italics), a reserved word (typeset in boldface) or a section name (typeset
by a complex routine that might generate additional levels of output).
In these cases |cur_name| points to the identifier or section name in
question.
@<Global...@>=
name_pointer cur_name;
@ @d res_word 0201 /* returned by |get_output| for reserved words */
@d section_code 0200 /* returned by |get_output| for section names */
@c
eight_bits
get_output() /* returns the next token of output */
{
sixteen_bits a; /* current item read from |tok_mem| */
restart: while (cur_tok==cur_end) pop_level();
a=*(cur_tok++);
if (a>=0400) {
cur_name=a % id_flag + name_dir;
switch (a / id_flag) {
case 2: return(res_word); /* |a==res_flag+cur_name| */
case 3: return(section_code); /* |a==section_flag+cur_name| */
case 4: push_level(a % id_flag + tok_start); goto restart;
/* |a==tok_flag+cur_name| */
case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart;
/* |a==inner_tok_flag+cur_name| */
default: return(identifier); /* |a==id_flag+cur_name| */
}
}
return(a);
}
@ The real work associated with token output is done by |make_output|.
This procedure appends an |end_translation| token to the current token list,
and then it repeatedly calls |get_output| and feeds characters to the output
buffer until reaching the |end_translation| sentinel. It is possible for
|make_output| to be called recursively, since a section name may include
embedded CEE/ text; however, the depth of recursion never exceeds one
level, since section names cannot be inside of section names.
A procedure called |output_C| does the scanning, translation, and
output of CEE/ text within `pb' brackets, and this procedure uses
|make_output| to output the current token list. Thus, the recursive call
of |make_output| actually occurs when |make_output| calls |output_C|
while outputting the name of a section.
@^recursion@>
The token list created from within `pb' brackets is output as an argument
to .{\PB}. Although .{cwebmac} ignores .{\PB}, other macro packages
might use it to localize the special meaning of the macros that mark up
program text.
@c
void
output_C() /* outputs the current token list */
{
token_pointer save_tok_ptr;
text_pointer save_text_ptr;
sixteen_bits save_next_control; /* values to be restored */
text_pointer p; /* translation of the CEE/ text */
save_tok_ptr=tok_ptr; save_text_ptr=text_ptr;
save_next_control=next_control; next_control=ignore; p=C_translate();
app(inner_tok_flag+(int)(p-tok_start));
out_str("\PB{"); make_output(); out('}'); /* output the list */
@.\PB@>
if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* forget the tokens */
next_control=save_next_control; /* restore |next_control| to original state */
}
@ Here is .{CWEAVE}'s major output handler.
@<Predecl...@>=
void make_output();
@ @c
void
make_output() /* outputs the equivalents of tokens */
{
eight_bits a, /* current output byte */
b; /* next output byte */
int c; /* count of |indent| and |outdent| tokens */
char scratch[longest_name]; /* scratch area for section names */
char *k, *k_limit; /* indices into |scratch| */
char *j; /* index into |buffer| */
char *p; /* index into |byte_mem| */
char delim; /* first and last character of string being copied */
char *save_loc, *save_limit; /* |loc| and |limit| to be restored */
name_pointer cur_section_name; /* name of section being output */
boolean save_mode; /* value of |cur_mode| before a sequence of breaks */
app(end_translation); /* append a sentinel */
freeze_text; push_level(text_ptr-1);
while (1) {
a=get_output();
reswitch: switch(a) {
case end_translation: return;
case identifier: case res_word: @<Output an identifier@>; break;
case section_code: @<Output a section name@>; break;
case math_rel: out_str("\MRL{"@q}@>);
@.\MRL@>
case noop: case inserted: break;
case cancel: case big_cancel: c=0; b=a;
while (1) {
a=get_output();
if (a==inserted) continue;
if ((a<indent && !(b==big_cancel&&a==' ')) || a>big_force) break;
if (a==indent) c++; else if (a==outdent) c--;
else if (a==opt) a=get_output();
}
@<Output saved |indent| or |outdent| tokens@>;
goto reswitch;
case indent: case outdent: case opt: case backup: case break_space:
case force: case big_force: case preproc_line: @<Output a control,
look ahead in case of line breaks, possibly |goto reswitch|@>; break;
case quoted_char: out(*(cur_tok++)); break;
default: out(a); /* otherwise |a| is an ordinary character */
}
}
}
@ An identifier of length one does not have to be enclosed in braces, and it
looks slightly better if set in a math-italic font instead of a (slightly
narrower) text-italic font. Thus we output `.{\v}.{a}' but
`.{\\{aa}}'.
@<Output an identifier@>=
out('\');
if (a==identifier) {
if (cur_name->ilk>=custom && cur_name->ilk<=quoted && !doing_format) {
for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
out(isxalpha(*p)? 'x':*p);
break;
} else if (is_tiny(cur_name)) out('|')
@.\|@>
else { delim='.';
for (p=cur_name->byte_start;p<(cur_name+1)->byte_start;p++)
if (xislower(*p)) { /* not entirely uppercase */
delim='\'; break;
}
out(delim);
}
@.\\@>
@.\.@>
}
else out('&') /* |a==res_word| */
@.\&@>
if (is_tiny(cur_name)) {
if (isxalpha((cur_name->byte_start)[0]))
out('\');
out((cur_name->byte_start)[0]);
}
else out_name(cur_name);
@ The current mode does not affect the behavior of .{CWEAVE}'s output routine
except when we are outputting control tokens.
@<Output a control...@>=
if (a<break_space || a==preproc_line) {
if (cur_mode==outer) {
out('\'); out(a-cancel+'0');
@.\1@>
@.\2@>
@.\3@>
@.\4@>
@.\8@>
if (a==opt) {
b=get_output(); /* |opt| is followed by a digit */
if (b!='0' || force_lines==0) out(b)@;
else out_str("{-1}"); /* |force_lines| encourages more .{@@v} breaks */
}
} else if (a==opt) b=get_output(); /* ignore digit following |opt| */
}
else @<Look ahead for strongest line break, |goto reswitch|@>
@ If several of the tokens |break_space|, |force|, |big_force| occur in a
row, possibly mixed with blank spaces (which are ignored),
the largest one is used. A line break also occurs in the output file,
except at the very end of the translation. The very first line break
is suppressed (i.e., a line break that follows `.{\Y\B}').
@<Look ahead for st...@>= {
b=a; save_mode=cur_mode; c=0;
while (1) {
a=get_output();
if (a==inserted) continue;
if (a==cancel || a==big_cancel) {
@<Output saved |indent| or |outdent| tokens@>;
goto reswitch; /* |cancel| overrides everything */
}
if ((a!=' ' && a<indent) || a==backup || a>big_force) {
if (save_mode==outer) {
if (out_ptr>out_buf+3 && strncmp(out_ptr-3,"\Y\B",4)==0)
goto reswitch;
@<Output saved |indent| or |outdent| tokens@>;
out('\'); out(b-cancel+'0');
@.\5@>
@.\6@>
@.\7@>
if (a!=end_translation) finish_line();
}
else if (a!=end_translation && cur_mode==inner) out(' ');
goto reswitch;
}
if (a==indent) c++;
else if (a==outdent) c--;
else if (a==opt) a=get_output();
else if (a>b) b=a; /* if |a==' '| we have |a<b| */
}
}
@ @<Output saved...@>=
for (;c>0;c--) out_str("\1");
@.\1@>
for (;c<0;c++) out_str("\2");
@.\2@>
@ The remaining part of |make_output| is somewhat more complicated. When we
output a section name, we may need to enter the parsing and translation
routines, since the name may contain CEE/ code embedded in
pb constructions. This CEE/ code is placed at the end of the active
input buffer and the translation process uses the end of the active
|tok_mem| area.
@<Output a section name@>= {
out_str("\X");
@.\X@>
cur_xref=(xref_pointer)cur_name->xref;
if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
else an_output=0;
if (cur_xref->num>=def_flag) {
out_section(cur_xref->num-def_flag);
if (phase==3) {
cur_xref=cur_xref->xlink;
while (cur_xref->num>=def_flag) {
out_str(", ");
out_section(cur_xref->num-def_flag);
cur_xref=cur_xref->xlink;
}
}
}
else out('0'); /* output the section number, or zero if it was undefined */
out(':');
if (an_output) out_str("\.{"@q}@>);
@.\.@>
@<Output the text of the section name@>;
if (an_output) out_str(@q{@>" }");
out_str("\X");
}
@ @<Output the text...@>=
sprint_section_name(scratch,cur_name);
k=scratch;
k_limit=scratch+strlen(scratch);
cur_section_name=cur_name;
while (k<k_limit) {
b=*(k++);
if (b=='@@') @<Skip next character, give error if not `.{@@}'@>;
if (an_output)
switch (b) {
case ' ':case '\':case '#':case '%':case '$':case '^':
case '{': case '}': case '~': case '&': case '_':
out('\'); /* falls through */
@.\ @>
@.\\@>
@.\#@>
@.\%@>
@.\$@>
@.\^@>
@.\{@>@q}@>
@q{@>@.\}@>
@.\~@>
@.\&@>
@.\_@> @q CWEAVE does quote an underscore! @>
default: out(b);
}
else if (b!='|') out(b)
else {
@<Copy the CEE/ text into the |buffer| array@>;
save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1;
*limit='|'; output_C();
loc=save_loc; limit=save_limit;
}
}
@ @<Skip next char...@>=
if (*k++!='@@') {
printf("n! Illegal control code in section name: <");
@.Illegal control code...@>
print_section_name(cur_section_name); printf("> "); mark_error;
}
@ The CEE/ text enclosed in pb should not contain `.{v}' characters,
except within strings. We put a `.{v}' at the front of the buffer, so that an
error message that displays the whole buffer will look a little bit sensible.
The variable |delim| is zero outside of strings, otherwise it
equals the delimiter that began the string being copied.
@<Copy the CEE/ text into...@>=
j=limit+1; *j='|'; delim=0;
while (1) {
if (k>=k_limit) {
printf("n! C text in section name didn't end: <");
@.C text...didn't end@>
print_section_name(cur_section_name); printf("> "); mark_error; break;
}
b=*(k++);
if (b=='@@' || (b=='\' && delim!=0))
@<Copy a quoted character into the buffer@>
else {
if (b==''' || b=='"')
if (delim==0) delim=b;
else if (delim==b) delim=0;
if (b!='|' || delim!=0) {
if (j>buffer+long_buf_size-3) overflow("buffer");
*(++j)=b;
}
else break;
}
}
@ @<Copy a quoted char...@>= {
if (j>buffer+long_buf_size-4) overflow("buffer");
*(++j)=b; *(++j)=*(k++);
}
@** Phase two processing.
We have assembled enough pieces of the puzzle in order to be ready to specify
the processing in .{CWEAVE}'s main pass over the source file. Phase two
is analogous to phase one, except that more work is involved because we must
actually output the TEX/ material instead of merely looking at the
.{CWEB} specifications.
@<Predecl...@>=
void phase_two();
@ @c
void
phase_two() {
reset_input(); if (show_progress) printf("nWriting the output file...");
@.Writing the output file...@>
section_count=0; format_visible=1; copy_limbo();
finish_line(); flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
while (!input_has_ended) @<Translate the current section@>;
}
@ The output file will contain the control sequence .{\Y} between non-null
sections of a section, e.g., between the TEX/ and definition parts if both
are nonempty. This puts a little white space between the parts when they are
printed. However, we don't want .{\Y} to occur between two definitions
within a single section. The variables |out_line| or |out_ptr| will
change if a section is non-null, so the following macros `|save_position|'
and `|emit_space_if_needed|' are able to handle the situation:
@d save_position save_line=out_line; save_place=out_ptr
@d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr)
out_str("\Y");
space_checked=1
@.\Y@>
@<Global...@>=
int save_line; /* former value of |out_line| */
char *save_place; /* former value of |out_ptr| */
int sec_depth; /* the integer, if any, following .{@@*} */
boolean space_checked; /* have we done |emit_space_if_needed|? */
boolean format_visible; /* should the next format declaration be output? */
boolean doing_format=0; /* are we outputting a format declaration? */
boolean group_found=0; /* has a starred section occurred? */
@ @<Translate the current section@>= {
section_count++;
@<Output the code for the beginning of a new section@>;
save_position;
@<Translate the TEX/ part of the current section@>;
@<Translate the definition part of the current section@>;
@<Translate the CEE/ part of the current section@>;
@<Show cross-references to this section@>;
@<Output the code for the end of a section@>;
}
@ Sections beginning with the .{CWEB} control sequence `.{@@ }' start in the
output with the TEX/ control sequence `.{\M}', followed by the section
number. Similarly, `.{@@*}' sections lead to the control sequence `.{\N}'.
In this case there's an additional parameter, representing one plus the
specified depth, immediately after the .{\N}.
If the section has changed, we put .{\*} just after the section number.
@<Output the code for the beginning...@>=
if (*(loc-1)!='*') out_str("\M");
@.\M@>
else {
while (*loc == ' ') loc++;
if (*loc=='*') { /* ``top'' level */
sec_depth = -1;
loc++;
}
else {
for (sec_depth=0; xisdigit(*loc);loc++)
sec_depth = sec_depth*10 + (*loc) -'0';
}
while (*loc == ' ') loc++; /* remove spaces before group title */
group_found=1;
out_str("\N");
@.\N@>
{@+ char s[32];@+sprintf(s,"{%d}",sec_depth+1);@+out_str(s);@+}
if (show_progress)
printf("*%d",section_count); update_terminal; /* print a progress report */
}
out_str("{");out_section(section_count); out_str("}");
@ In the TEX/ part of a section, we simply copy the source text, except that
index entries are not copied and CEE/ text within pb is translated.
@<Translate the T...@>= do {
next_control=copy_TeX();
switch (next_control) {
case '|': init_stack; output_C(); break;
case '@@': out('@@'); break;
case TeX_string: case noop:
case xref_roman: case xref_wildcard: case xref_typewriter:
case section_name: loc-=2; next_control=get_next(); /* skip to .{@@>} */
if (next_control==TeX_string)
err_print("! TeX string should be in C text only"); break;
@.TeX string should be...@>
case thin_space: case math_break: case ord:
case line_break: case big_line_break: case no_line_break: case join:
case pseudo_semi: case macro_arg_open: case macro_arg_close:
case output_defs_code:
err_print("! You can't do that in TeX text"); break;
@.You can't do that...@>
}
} while (next_control<format_code);
@ When we get to the following code we have |next_control>=format_code|, and
the token memory is in its initial empty state.
@<Translate the d...@>=
space_checked=0;
while (next_control<=definition) { /* |format_code| or |definition| */
init_stack;
if (next_control==definition) @<Start a macro definition@>@;
else @<Start a format definition@>;
outer_parse(); finish_C(format_visible); format_visible=1;
doing_format=0;
}
@ The |finish_C| procedure outputs the translation of the current
scraps, preceded by the control sequence `.{\B}' and followed by the
control sequence `.{\par}'. It also restores the token and scrap
memories to their initial empty state.
A |force| token is appended to the current scraps before translation
takes place, so that the translation will normally end with .{\6} or
.{\7} (the TEX/ macros for |force| and |big_force|). This .{\6} or
.{\7} is replaced by the concluding .{\par} or by .{\Y\par}.
@<Predecl...@>=
void finish_C();
@ @c
void
finish_C(visible) /* finishes a definition or a CEE/ part */
boolean visible; /* nonzero if we should produce TEX/ output */
{
text_pointer p; /* translation of the scraps */
if (visible) {
out_str("\B"); app_tok(force); app_scrap(insert,no_math);
p=translate();
@.\B@>
app(tok_flag+(int)(p-tok_start)); make_output(); /* output the list */
if (out_ptr>out_buf+1)
if (*(out_ptr-1)=='\')
@.\6@>
@.\7@>
@.\Y@>
if (*out_ptr=='6') out_ptr-=2;
else if (*out_ptr=='7') *out_ptr='Y';
out_str("\par"); finish_line();
}
if (text_ptr>max_text_ptr) max_text_ptr=text_ptr;
if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr;
if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr;
tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info;
/* forget the tokens and the scraps */
}
@ Keeping in line with the conventions of the CEE/ preprocessor (and
otherwise contrary to the rules of .{CWEB}) we distinguish here
between the case that `.(' immediately follows an identifier and the
case that the two are separated by a space. In the latter case, and
if the identifier is not followed by `.(' at all, the replacement
text starts immediately after the identifier. In the former case,
it starts after we scan the matching `.)'.
@<Start a macro...@>= {
if (save_line!=out_line || save_place!=out_ptr || space_checked) app(backup);
if(!space_checked){emit_space_if_needed;save_position;}
app_str("\D"); /* this will produce `&{define }' */
@.\D@>
if ((next_control=get_next())!=identifier)
err_print("! Improper macro definition");
@.Improper macro definition@>
else {
app('$'); app_cur_id(0);
if (*loc=='(')
reswitch: switch (next_control=get_next()) {
case '(': case ',': app(next_control); goto reswitch;
case identifier: app_cur_id(0); goto reswitch;
case ')': app(next_control); next_control=get_next(); break;
default: err_print("! Improper macro definition"); break;
}
else next_control=get_next();
app_str("$ "); app(break_space);
app_scrap(dead,no_math); /* scrap won't take part in the parsing */
}
}
@ @<Start a format...@>= {
doing_format=1;
if(*(loc-1)=='s' || *(loc-1)=='S') format_visible=0;
if(!space_checked){emit_space_if_needed;save_position;}
app_str("\F"); /* this will produce `&{format }' */
@.\F@>
next_control=get_next();
if (next_control==identifier) {
app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
app(' ');
app(break_space); /* this is syntactically separate from what follows */
next_control=get_next();
if (next_control==identifier) {
app(id_flag+(int)(id_lookup(id_first, id_loc,normal)-name_dir));
app_scrap(exp,maybe_math); app_scrap(semi,maybe_math);
next_control=get_next();
}
}
if (scrap_ptr!=scrap_info+2) err_print("! Improper format definition");
@.Improper format definition@>
}
@ Finally, when the TEX/ and definition parts have been treated, we have
|next_control>=begin_C|. We will make the global variable |this_section|
point to the current section name, if it has a name.
@<Global...@>=
name_pointer this_section; /* the current section name, or zero */
@ @<Translate the CEE/...@>=
this_section=name_dir;
if (next_control<=section_name) {
emit_space_if_needed; init_stack;
if (next_control==begin_C) next_control=get_next();
else {
this_section=cur_section;
@<Check that '=' or '==' follows this section name, and
emit the scraps to start the section definition@>;
}
while (next_control<=section_name) {
outer_parse();
@<Emit the scrap for a section name if present@>;
}
finish_C(1);
}
@ The title of the section and an $E$ or $mathrel+E$ are made
into a scrap that should not take part in the parsing.
@<Check that '='...@>=
do next_control=get_next();
while (next_control=='+'); /* allow optional `.{+=}' */
if (next_control!='=' && next_control!=eq_eq)
err_print("! You need an = sign after the section name");
@.You need an = sign...@>
else next_control=get_next();
if (out_ptr>out_buf+1 && *out_ptr=='Y' && *(out_ptr-1)=='\') app(backup);
/* the section name will be flush left */
@.\Y@>
app(section_flag+(int)(this_section-name_dir));
cur_xref=(xref_pointer)this_section->xref;
if(cur_xref->num==file_flag) cur_xref=cur_xref->xlink;
app_str("${}");
if (cur_xref->num!=section_count+def_flag) {
app_str("\mathrel+"); /*section name is multiply defined*/
this_section=name_dir; /*so we won't give cross-reference info here*/
}
app_str("\E"); /* output an equivalence sign */
@.\E@>
app_str("{}$");
app(force); app_scrap(dead,no_math);
/* this forces a line break unless `.{@@+}' follows */
@ @<Emit the scrap...@>=
if (next_control<section_name) {
err_print("! You can't do that in C text");
@.You can't do that...@>
next_control=get_next();
}
else if (next_control==section_name) {
app(section_flag+(int)(cur_section-name_dir));
app_scrap(section_scrap,maybe_math);
next_control=get_next();
}
@ Cross references relating to a named section are given
after the section ends.
@<Show cross...@>=
if (this_section>name_dir) {
cur_xref=(xref_pointer)this_section->xref;
if (cur_xref->num==file_flag){an_output=1;cur_xref=cur_xref->xlink;}
else an_output=0;
if (cur_xref->num>def_flag)
cur_xref=cur_xref->xlink; /* bypass current section number */
footnote(def_flag); footnote(cite_flag); footnote(0);
}
@ The |footnote| procedure gives cross-reference information about
multiply defined section names (if the |flag| parameter is
|def_flag|), or about references to a section name
(if |flag==cite_flag|), or to its uses (if |flag==0|). It assumes that
|cur_xref| points to the first cross-reference entry of interest, and it
leaves |cur_xref| pointing to the first element not printed. Typical outputs:
`.{\A101.}'; `.{\Us 370\ET1009.}';
`.{\As 8, 27\*\ETs64.}'.
Note that the output of .{CWEAVE} is not English-specific; users may
supply new definitions for the macros .{\A}, .{\As}, etc.
@<Predecl...@>=
void footnote();
@ @c
void
footnote(flag) /* outputs section cross-references */
sixteen_bits flag;
{
xref_pointer q; /* cross-reference pointer variable */
if (cur_xref->num<=flag) return;
finish_line(); out('\');
@.\A@>
@.\Q@>
@.\U@>
out(flag==0? 'U': flag==cite_flag? 'Q': 'A');
@<Output all the section numbers on the reference list |cur_xref|@>;
out('.');
}
@ The following code distinguishes three cases, according as the number
of cross-references is one, two, or more than two. Variable |q| points
to the first cross-reference, and the last link is a zero.
@<Output all the section numbers...@>=
q=cur_xref; if (q->xlink->num>flag) out('s'); /* plural */
while (1) {
out_section(cur_xref->num-flag);
cur_xref=cur_xref->xlink; /* point to the next cross-reference to output */
if (cur_xref->num<=flag) break;
if (cur_xref->xlink->num>flag) out_str(", "); /* not the last */
else {out_str("\ET"); /* the last */
@.\ET@>
if (cur_xref != q->xlink) out('s'); /* the last of more than two */
}
}
@ @<Output the code for the end of a section@>=
out_str("\fi"); finish_line();
@.\fi@>
flush_buffer(out_buf,0,0); /* insert a blank line, it looks nice */
@** Phase three processing.
We are nearly finished! .{CWEAVE}'s only remaining task is to write out the
index, after sorting the identifiers and index entries.
If the user has set the |no_xref| flag (the .{-x} option on the command line),
just finish off the page, omitting the index, section name list, and table of
contents.
@<Predecl...@>=
void phase_three();
@ @c
void
phase_three() {
if (no_xref) {
finish_line();
out_str("\end");
@.\end@>
finish_line();
}
else {
phase=3; if (show_progress) printf("nWriting the index...");
@.Writing the index...@>
finish_line();
if ((idx_file=fopen(idx_file_name,"w"))==NULL)
fatal("! Cannot open index file ",idx_file_name);
@.Cannot open index file@>
if (change_exists) {
@<Tell about changed sections@>; finish_line(); finish_line();
}
out_str("\inx"); finish_line();
@.\inx@>
active_file=idx_file; /* change active file to the index file */
@<Do the first pass of sorting@>;
@<Sort and output the index@>;
finish_line(); fclose(active_file); /* finished with |idx_file| */
active_file=tex_file; /* switch back to |tex_file| for a tic */
out_str("\fin"); finish_line();
@.\fin@>
if ((scn_file=fopen(scn_file_name,"w"))==NULL)
fatal("! Cannot open section file ",scn_file_name);
@.Cannot open section file@>
active_file=scn_file; /* change active file to section listing file */
@<Output all the section names@>;
finish_line(); fclose(active_file); /* finished with |scn_file| */
active_file=tex_file;
if (group_found) out_str("\con");@+else out_str("\end");
@.\con@>
@.\end@>
finish_line();
fclose(active_file);
}
if (show_happiness) printf("nDone.");
check_complete(); /* was all of the change file used? */
}
@ Just before the index comes a list of all the changed sections, including
the index section itself.
@<Global...@>=
sixteen_bits k_section; /* runs through the sections */
@ @<Tell about changed sections@>= {
/* remember that the index is already marked as changed */
k_section=0;
while (!changed_section[++k_section]);
out_str("\ch ");
@.\ch@>
out_section(k_section);
while (k_section<section_count) {
while (!changed_section[++k_section]);
out_str(", "); out_section(k_section);
}
out('.');
}
@ A left-to-right radix sorting method is used, since this makes it easy to
adjust the collating sequence and since the running time will be at worst
proportional to the total length of all entries in the index. We put the
identifiers into 102 different lists based on their first characters.
(Uppercase letters are put into the same list as the corresponding lowercase
letters, since we want to have `$t<\{TeX}<&{to}$'.) The
list for character |c| begins at location |bucket[c]| and continues through
the |blink| array.
@<Global...@>=
name_pointer bucket[256];
name_pointer next_name; /* successor of |cur_name| when sorting */
name_pointer blink[max_names]; /* links in the buckets */
@ To begin the sorting, we go through all the hash lists and put each entry
having a nonempty cross-reference list into the proper bucket.
@<Do the first pass...@>= {
int c;
for (c=0; c<=255; c++) bucket[c]=NULL;
for (h=hash; h<=hash_end; h++) {
next_name=*h;
while (next_name) {
cur_name=next_name; next_name=cur_name->link;
if (cur_name->xref!=(char*)xmem) {
c=(eight_bits)((cur_name->byte_start)[0]);
if (xisupper(c)) c=tolower(c);
blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
}
}
}
}
@ During the sorting phase we shall use the |cat| and |trans| arrays from
.{CWEAVE}'s parsing algorithm and rename them |depth| and |head|. They now
represent a stack of identifier lists for all the index entries that have
not yet been output. The variable |sort_ptr| tells how many such lists are
present; the lists are output in reverse order (first |sort_ptr|, then
|sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first
|k| characters of all entries on this list are known to be equal we have
|depth[j]==k|.
@ @<Rest of |trans_plus| union@>=
name_pointer Head;
@ @d depth cat /* reclaims memory that is no longer needed for parsing */
@d head trans_plus.Head /* ditto */
@f sort_pointer int
@d sort_pointer scrap_pointer /* ditto */
@d sort_ptr scrap_ptr /* ditto */
@d max_sorts max_scraps /* ditto */
@<Global...@>=
eight_bits cur_depth; /* depth of current buckets */
char *cur_byte; /* index into |byte_mem| */
sixteen_bits cur_val; /* current cross-reference number */
sort_pointer max_sort_ptr; /* largest value of |sort_ptr| */
@ @<Set init...@>=
max_sort_ptr=scrap_info;
@ The desired alphabetic order is specified by the |collate| array; namely,
$|collate|[0]<|collate|[1]<cdots<|collate|[100]$.
@<Global...@>=
eight_bits collate[102+128]; /* collation order */
@^high-bit character handling@>
@ We use the order $hbox{null}<. <hbox{other characters}<{}$._${}<
.A=.a<cdots<.Z=.z<.0<cdots<.9.$ Warning: The collation mapping
needs to be changed if ASCII code is not being used.
@^ASCII code dependencies@>
@^high-bit character handling@>
We initialize |collate| by copying a few characters at a time, because
some CEE/ compilers choke on long strings.
@<Set init...@>=
collate[0]=0;
strcpy(collate+1," 12345671011121314151617");
/* 16 characters + 1 = 17 */
strcpy(collate+17,"20212223242526273031323334353637");
/* 16 characters + 17 = 33 */
strcpy(collate+33,"!42#$%&'()*+,-./:;<=>?@@[\]^`{|}~_");
/* 32 characters + 33 = 65 */
strcpy(collate+65,"abcdefghijklmnopqrstuvwxyz0123456789");
/* (26 + 10) characters + 65 = 101 */
strcpy(collate+101,"200201202203204205206207210211212213214215216217");
/* 16 characters + 101 = 117 */
strcpy(collate+117,"220221222223224225226227230231232233234235236237");
/* 16 characters + 117 = 133 */
strcpy(collate+133,"240241242243244245246247250251252253254255256257");
/* 16 characters + 133 = 149 */
strcpy(collate+149,"260261262263264265266267270271272273274275276277");
/* 16 characters + 149 = 165 */
strcpy(collate+165,"300301302303304305306307310311312313314315316317");
/* 16 characters + 165 = 181 */
strcpy(collate+181,"320321322323324325326327330331332333334335336337");
/* 16 characters + 181 = 197 */
strcpy(collate+197,"340341342343344345346347350351352353354355356357");
/* 16 characters + 197 = 213 */
strcpy(collate+213,"360361362363364365366367370371372373374375376377");
/* 16 characters + 213 = 229 */
@ Procedure |unbucket| goes through the buckets and adds nonempty lists
to the stack, using the collating sequence specified in the |collate| array.
The parameter to |unbucket| tells the current depth in the buckets.
Any two sequences that agree in their first 255 character positions are
regarded as identical.
@d infinity 255 /* $infty$ (approximately) */
@<Predecl...@>=
void unbucket();
@ @c
void
unbucket(d) /* empties buckets having depth |d| */
eight_bits d;
{
int c; /* index into |bucket|; cannot be a simple |char| because of sign
comparison below*/
for (c=100+128; c>= 0; c--) if (bucket[collate[c]]) {
@^high-bit character handling@>
if (sort_ptr>=scrap_info_end) overflow("sorting");
sort_ptr++;
if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr;
if (c==0) sort_ptr->depth=infinity;
else sort_ptr->depth=d;
sort_ptr->head=bucket[collate[c]]; bucket[collate[c]]=NULL;
}
}
@ @<Sort and output...@>=
sort_ptr=scrap_info; unbucket(1);
while (sort_ptr>scrap_info) {
cur_depth=sort_ptr->depth;
if (blink[sort_ptr->head-name_dir]==0 || cur_depth==infinity)
@<Output index entries for the list at |sort_ptr|@>@;
else @<Split the list at |sort_ptr| into further lists@>;
}
@ @<Split the list...@>= {
eight_bits c;
next_name=sort_ptr->head;
do {
cur_name=next_name; next_name=blink[cur_name-name_dir];
cur_byte=cur_name->byte_start+cur_depth;
if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */
else {
c=(eight_bits) *cur_byte;
if (xisupper(c)) c=tolower(c);
}
blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name;
} while (next_name);
--sort_ptr; unbucket(cur_depth+1);
}
@ @<Output index...@>= {
cur_name=sort_ptr->head;
do {
out_str("\I");
@.\I@>
@<Output the name at |cur_name|@>;
@<Output the cross-references at |cur_name|@>;
cur_name=blink[cur_name-name_dir];
} while (cur_name);
--sort_ptr;
}
@ @<Output the name...@>=
switch (cur_name->ilk) {
case normal: if (is_tiny(cur_name)) out_str("\|");
else {char *j;
for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
if (xislower(*j)) goto lowcase;
out_str("\."); break;
lowcase: out_str("\\");
}
break;
@.\|@>
@.\.@>
@.\\@>
case roman: break;
case wildcard: out_str("\9"); break;
@.\9@>
case typewriter: out_str("\."); break;
@.\.@>
case custom: case quoted: {char *j; out_str("$\");
for (j=cur_name->byte_start;j<(cur_name+1)->byte_start;j++)
out(isxalpha(*j)? 'x' : *j);
out('$');
goto name_done;
}
default: out_str("\&");
@.\&@>
}
out_name(cur_name);
name_done:
@ Section numbers that are to be underlined are enclosed in
`.{\[}$,ldots,$.]'.
@<Output the cross-references...@>=
@<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>;
do {
out_str(", "); cur_val=cur_xref->num;
if (cur_val<def_flag) out_section(cur_val);
else {out_str("\["); out_section(cur_val-def_flag); out(']');}
@.\[@>
cur_xref=cur_xref->xlink;
} while (cur_xref!=xmem);
out('.'); finish_line();
@ List inversion is best thought of as popping elements off one stack and
pushing them onto another. In this case |cur_xref| will be the head of
the stack that we push things onto.
@<Global...@>=
xref_pointer next_xref, this_xref;
/* pointer variables for rearranging a list */
@ @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>=
this_xref=(xref_pointer)cur_name->xref; cur_xref=xmem;
do {
next_xref=this_xref->xlink; this_xref->xlink=cur_xref;
cur_xref=this_xref; this_xref=next_xref;
} while (this_xref!=xmem);
@ The following recursive procedure walks through the tree of section names and
prints them.
@^recursion@>
@<Predecl...@>=
void section_print();
@ @c
void
section_print(p) /* print all section names in subtree |p| */
name_pointer p;
{
if (p) {
section_print(p->llink); out_str("\I");
@.\I@>
tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrap_ptr=scrap_info; init_stack;
app(p-name_dir+section_flag); make_output();
footnote(cite_flag);
footnote(0); /* |cur_xref| was set by |make_output| */
finish_line();@/
section_print(p->rlink);
}
}
@ @<Output all the section names@>=section_print(root)
@ Because on some systems the difference between two pointers is a |long|
rather than an |int|, we use .{%ld} to print these quantities.
@c
void
print_stats() {
printf("nMemory usage statistics:n");
@.Memory usage statistics:@>
printf("%ld names (out of %ld)n",
(long)(name_ptr-name_dir),(long)max_names);
printf("%ld cross-references (out of %ld)n",
(long)(xref_ptr-xmem),(long)max_refs);
printf("%ld bytes (out of %ld)n",
(long)(byte_ptr-byte_mem),(long)max_bytes);
printf("Parsing:n");
printf("%ld scraps (out of %ld)n",
(long)(max_scr_ptr-scrap_info),(long)max_scraps);
printf("%ld texts (out of %ld)n",
(long)(max_text_ptr-tok_start),(long)max_texts);
printf("%ld tokens (out of %ld)n",
(long)(max_tok_ptr-tok_mem),(long)max_toks);
printf("%ld levels (out of %ld)n",
(long)(max_stack_ptr-stack),(long)stack_size);
printf("Sorting:n");
printf("%ld levels (out of %ld)n",
(long)(max_sort_ptr-scrap_info),(long)max_scraps);
}
@** Index.
If you have read and understood the code for Phase III above, you know what
is in this index and how it got here. All sections in which an identifier is
used are listed with that identifier, except that reserved words are
indexed only when they appear in format definitions, and the appearances
of identifiers in section names are not indexed. Underlined entries
correspond to where the identifier was declared. Error messages, control
sequences put into the output, and a few
other things like ``recursion'' are indexed here too.