cweave.w
上传用户:rrhhcc
上传日期:2015-12-11
资源大小:54129k
文件大小:159k
- % This file is part of CWEB.
- % This program by Silvio Levy and Donald E. Knuth
- % is based on a program by Knuth.
- % It is distributed WITHOUT ANY WARRANTY, express or implied.
- % Version 3.4 --- April 1995
- % Copyright (C) 1987,1990,1993 Silvio Levy and Donald E. Knuth
- % Permission is granted to make and distribute verbatim copies of this
- % document provided that the copyright notice and this permission notice
- % are preserved on all copies.
- % Permission is granted to copy and distribute modified versions of this
- % document under the conditions for verbatim copying, provided that the
- % entire resulting derived work is given a different name and distributed
- % under the terms of a permission notice identical to this one.
- % Here is TeX material that gets inserted after input cwebmac
- defhang{hangindent 3emindentignorespaces}
- defpb{$.|ldots.|$} % C brackets (|...|)
- defv{char'174} % vertical (|) in typewriter font
- defdleft{[![} defdright{]!]} % double brackets
- mathchardefRA="3221 % right arrow
- mathchardefBA="3224 % double arrow
- def({} % ) kludge for alphabetizing certain section names
- defTeXxstring{\{TEX/_string}}
- defskipxTeX{\{skip_TEX/}}
- defcopyxTeX{\{copy_TEX/}}
- deftitle{CWEAVE (Version 3.4)}
- deftopofcontents{nullvfill
- centerline{titlefont The {ttitlefont CWEAVE} processor}
- vskip 15pt
- centerline{(Version 3.4)}
- vfill}
- defbotofcontents{vfill
- noindent
- Copyright copyright 1987, 1990, 1993 Silvio Levy and Donald E. Knuth
- bigskipnoindent
- Permission is granted to make and distribute verbatim copies of this
- document provided that the copyright notice and this permission notice
- are preserved on all copies.
- smallskipnoindent
- Permission is granted to copy and distribute modified versions of this
- document under the conditions for verbatim copying, provided that the
- entire resulting derived work is given a different name and distributed
- under the terms of a permission notice identical to this one.
- }
- pageno=contentspagenumber advancepageno by 1
- letmaybe=iftrue
- @** Introduction.
- This is the .{CWEAVE} program by Silvio Levy and Donald E. Knuth,
- based on .{WEAVE} by Knuth.
- We are thankful to Steve Avery,
- Nelson Beebe, Hans-Hermann Bode (to whom the CPLUSPLUS/ adaptation is due),
- Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
- Joachim Schrod, Lee Wittenberg, and others who have contributed improvements.
- The ``banner line'' defined here should be changed whenever .{CWEAVE}
- is modified.
- @d banner "This is CWEAVE (Version 3.4)n"
- @c @<Include files@>@/
- @h
- @<Common code for .{CWEAVE} and .{CTANGLE}@>@/
- @<Typedef declarations@>@/
- @<Global variables@>@/
- @<Predeclaration of procedures@>
- @ We predeclare several standard system functions here instead of including
- their system header files, because the names of the header files are not as
- standard as the names of the functions. (For example, some CEE/ environments
- have .{<string.h>} where others have .{<strings.h>}.)
- @<Predecl...@>=
- extern int strlen(); /* length of string */
- extern int strcmp(); /* compare strings lexicographically */
- extern char* strcpy(); /* copy one string to another */
- extern int strncmp(); /* compare up to $n$ string characters */
- extern char* strncpy(); /* copy up to $n$ string characters */
- @ .{CWEAVE} has a fairly straightforward outline. It operates in
- three phases: First it inputs the source file and stores cross-reference
- data, then it inputs the source once again and produces the TEX/ output
- file, finally it sorts and outputs the index.
- Please read the documentation for .{common}, the set of routines common
- to .{CTANGLE} and .{CWEAVE}, before proceeding further.
- @c
- int main (ac, av)
- int ac; /* argument count */
- char **av; /* argument values */
- {
- argc=ac; argv=av;
- program=cweave;
- make_xrefs=force_lines=1; /* controlled by command-line options */
- common_init();
- @<Set initial values@>;
- if (show_banner) printf(banner); /* print a ``banner line'' */
- @<Store all the reserved words@>;
- phase_one(); /* read all the user's text and store the cross-references */
- phase_two(); /* read all the text again and translate it to TEX/ form */
- phase_three(); /* output the cross-reference index */
- return wrap_up(); /* and exit gracefully */
- }
- @ The following parameters were sufficient in the original .{WEAVE} to
- handle TEX/, so they should be sufficient for most applications of .{CWEAVE}.
- If you change |max_bytes|, |max_names|, |hash_size| or |buf_size|
- you have to change them also in the file |"common.w"|.
- @d max_bytes 90000 /* the number of bytes in identifiers,
- index entries, and section names */
- @d max_names 4000 /* number of identifiers, strings, section names;
- must be less than 10240; used in |"common.w"| */
- @d max_sections 2000 /* greater than the total number of sections */
- @d hash_size 353 /* should be prime */
- @d buf_size 100 /* maximum length of input line, plus one */
- @d longest_name 1000 /* section names and strings shouldn't be longer than this */
- @d long_buf_size (buf_size+longest_name)
- @d line_length 80 /* lines of TEX/ output have at most this many characters;
- should be less than 256 */
- @d max_refs 20000 /* number of cross-references; must be less than 65536 */
- @d max_toks 20000 /* number of symbols in CEE/ texts being parsed;
- must be less than 65536 */
- @d max_texts 4000 /* number of phrases in CEE/ texts being parsed;
- must be less than 10240 */
- @d max_scraps 2000 /* number of tokens in CEE/ texts being parsed */
- @d stack_size 400 /* number of simultaneous output levels */
- @ The next few sections contain stuff from the file |"common.w"| that must
- be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
- file |"common.h"|, which needs to be updated when |"common.w"| changes.
- @i common.h
- @* Data structures exclusive to {tt CWEAVE}.
- As explained in .{common.w}, the field of a |name_info| structure
- that contains the |rlink| of a section name is used for a completely
- different purpose in the case of identifiers. It is then called the
- |ilk| of the identifier, and it is used to
- distinguish between various types of identifiers, as follows:
- yskiphang |normal| identifiers are part of the CEE/ program and
- will appear in italic type.
- yskiphang |roman| identifiers are index entries that appear after
- .{@@^} in the .{CWEB} file.
- yskiphang |wildcard| identifiers are index entries that appear after
- .{@@:} in the .{CWEB} file.
- yskiphang |typewriter| identifiers are index entries that appear after
- .{@@.} in the .{CWEB} file.
- yskiphang |else_like|, dots, |typedef_like|
- identifiers are CEE/ reserved words whose |ilk| explains how they are
- to be treated when CEE/ code is being formatted.
- @d ilk dummy.Ilk
- @d normal 0 /* ordinary identifiers have |normal| ilk */
- @d roman 1 /* normal index entries have |roman| ilk */
- @d wildcard 2 /* user-formatted index entries have |wildcard| ilk */
- @d typewriter 3 /* `typewriter type' entries have |typewriter| ilk */
- @d abnormal(a) (a->ilk>typewriter) /* tells if a name is special */
- @d custom 4 /* identifiers with user-given control sequence */
- @d unindexed(a) (a->ilk>custom) /* tells if uses of a name are to be indexed */
- @d quoted 5 /* .{NULL} */
- @d else_like 26 /* &{else} */
- @d public_like 40 /* &{public}, &{private}, &{protected} */
- @d operator_like 41 /* &{operator} */
- @d new_like 42 /* &{new} */
- @d catch_like 43 /* &{catch} */
- @d for_like 45 /* .{for}, &{switch}, &{while} */
- @d do_like 46 /* &{do} */
- @d if_like 47 /* &{if}, &{ifdef}, &{endif}, &{pragma}, dots */
- @d raw_rpar 48 /* `.)' or `.]' when looking for &{const} following */
- @d raw_unorbin 49 /* `.&' or `.*' when looking for &{const} following */
- @d const_like 50 /* &{const}, &{volatile} */
- @d raw_int 51 /* &{int}, &{char}, &{extern}, dots */
- @d int_like 52 /* same, when not followed by left parenthesis */
- @d case_like 53 /* &{case}, &{return}, &{goto}, &{break}, &{continue} */
- @d sizeof_like 54 /* &{sizeof} */
- @d struct_like 55 /* &{struct}, &{union}, &{enum}, &{class} */
- @d typedef_like 56 /* &{typedef} */
- @d define_like 57 /* &{define} */
- @ We keep track of the current section number in |section_count|, which
- is the total number of sections that have started. Sections which have
- been altered by a change file entry have their |changed_section| flag
- turned on during the first phase.
- @<Global...@>=
- boolean change_exists; /* has any section changed? */
- @ The other large memory area in .{CWEAVE} keeps the cross-reference data.
- All uses of the name |p| are recorded in a linked list beginning at
- |p->xref|, which points into the |xmem| array. The elements of |xmem|
- are structures consisting of an integer, |num|, and a pointer |xlink|
- to another element of |xmem|. If |x=p->xref| is a pointer into |xmem|,
- the value of |x->num| is either a section number where |p| is used,
- or |cite_flag| plus a section number where |p| is mentioned,
- or |def_flag| plus a section number where |p| is defined;
- and |x->xlink| points to the next such cross-reference for |p|,
- if any. This list of cross-references is in decreasing order by
- section number. The next unused slot in |xmem| is |xref_ptr|.
- The linked list ends at |&xmem[0]|.
- The global variable |xref_switch| is set either to |def_flag| or to zero,
- depending on whether the next cross-reference to an identifier is to be
- underlined or not in the index. This switch is set to |def_flag| when
- .{@@!} or .{@@d} is scanned, and it is cleared to zero when
- the next identifier or index entry cross-reference has been made.
- Similarly, the global variable |section_xref_switch| is either
- |def_flag| or |cite_flag| or zero, depending
- on whether a section name is being defined, cited or used in CEE/ text.
- @<Type...@>=
- typedef struct xref_info {
- sixteen_bits num; /* section number plus zero or |def_flag| */
- struct xref_info *xlink; /* pointer to the previous cross-reference */
- } xref_info;
- typedef xref_info *xref_pointer;
- @ @<Global...@>=
- xref_info xmem[max_refs]; /* contains cross-reference information */
- xref_pointer xmem_end = xmem+max_refs-1;
- xref_pointer xref_ptr; /* the largest occupied position in |xmem| */
- sixteen_bits xref_switch,section_xref_switch; /* either zero or |def_flag| */
- @ A section that is used for multi-file output (with the .{@@(} feature)
- has a special first cross-reference whose |num| field is |file_flag|.
- @d file_flag (3*cite_flag)
- @d def_flag (2*cite_flag)
- @d cite_flag 10240 /* must be strictly larger than |max_sections| */
- @d xref equiv_or_xref
- @<Set init...@>=
- xref_ptr=xmem; name_dir->xref=(char*)xmem; xref_switch=0; section_xref_switch=0;
- xmem->num=0; /* sentinel value */
- @ A new cross-reference for an identifier is formed by calling |new_xref|,
- which discards duplicate entries and ignores non-underlined references
- to one-letter identifiers or CEE/'s reserved words.
- If the user has sent the |no_xref| flag (the .{-x} option of the command line),
- it is unnecessary to keep track of cross-references for identifiers.
- If one were careful, one could probably make more changes around section
- 100 to avoid a lot of identifier looking up.
- @d append_xref(c) if (xref_ptr==xmem_end) overflow("cross-reference");
- else (++xref_ptr)->num=c;
- @d no_xref (flags['x']==0)
- @d make_xrefs flags['x'] /* should cross references be output? */
- @d is_tiny(p) ((p+1)->byte_start==(p)->byte_start+1)
- @c
- void
- new_xref(p)
- name_pointer p;
- {
- xref_pointer q; /* pointer to previous cross-reference */
- sixteen_bits m, n; /* new and previous cross-reference value */
- if (no_xref) return;
- if ((unindexed(p) || is_tiny(p)) && xref_switch==0) return;
- m=section_count+xref_switch; xref_switch=0; q=(xref_pointer)p->xref;
- if (q != xmem) {
- n=q->num;
- if (n==m || n==m+def_flag) return;
- else if (m==n+def_flag) {
- q->num=m; return;
- }
- }
- append_xref(m); xref_ptr->xlink=q; p->xref=(char*)xref_ptr;
- }
- @ The cross-reference lists for section names are slightly different.
- Suppose that a section name is defined in sections $m_1$, dots,
- $m_k$, cited in sections $n_1$, dots, $n_l$, and used in sections
- $p_1$, dots, $p_j$. Then its list will contain $m_1+|def_flag|$,
- dots, $m_k+|def_flag|$, $n_1+|cite_flag|$, dots,
- $n_l+|cite_flag|$, $p_1$, dots, $p_j$, in this order.
- Although this method of storage take quadratic time on the length of
- the list, under foreseeable uses of .{CWEAVE} this inefficiency is
- insignificant.
- @c
- void
- new_section_xref(p)
- name_pointer p;
- {
- xref_pointer q,r; /* pointers to previous cross-references */
- q=(xref_pointer)p->xref; r=xmem;
- if (q>xmem)
- while (q->num>section_xref_switch) {r=q; q=q->xlink;}
- if (r->num==section_count+section_xref_switch)
- return; /* don't duplicate entries */
- append_xref(section_count+section_xref_switch);
- xref_ptr->xlink=q; section_xref_switch=0;
- if (r==xmem) p->xref=(char*)xref_ptr;
- else r->xlink=xref_ptr;
- }
- @ The cross-reference list for a section name may also begin with
- |file_flag|. Here's how that flag gets put~in.
- @c
- void
- set_file_flag(p)
- name_pointer p;
- {
- xref_pointer q;
- q=(xref_pointer)p->xref;
- if (q->num==file_flag) return;
- append_xref(file_flag);
- xref_ptr->xlink = q;
- p->xref = (char *)xref_ptr;
- }
- @ A third large area of memory is used for sixteen-bit `tokens', which appear
- in short lists similar to the strings of characters in |byte_mem|. Token lists
- are used to contain the result of CEE/ code translated into TEX/ form;
- further details about them will be explained later. A |text_pointer| variable
- is an index into |tok_start|.
- @<Typed...@>=
- typedef sixteen_bits token;
- typedef token *token_pointer;
- typedef token_pointer *text_pointer;
- @ The first position of |tok_mem|
- that is unoccupied by replacement text is called |tok_ptr|, and the first
- unused location of |tok_start| is called |text_ptr|.
- Thus, we usually have |*text_ptr==tok_ptr|.
- @<Global...@>=
- token tok_mem[max_toks]; /* tokens */
- token_pointer tok_mem_end = tok_mem+max_toks-1; /* end of |tok_mem| */
- token_pointer tok_start[max_texts]; /* directory into |tok_mem| */
- token_pointer tok_ptr; /* first unused position in |tok_mem| */
- text_pointer text_ptr; /* first unused position in |tok_start| */
- text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */
- token_pointer max_tok_ptr; /* largest value of |tok_ptr| */
- text_pointer max_text_ptr; /* largest value of |text_ptr| */
- @ @<Set init...@>=
- tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;
- tok_start[1]=tok_mem+1;
- max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;
- @ Here are the three procedures needed to complete |id_lookup|:
- @c
- int names_match(p,first,l,t)
- name_pointer p; /* points to the proposed match */
- char *first; /* position of first character of string */
- int l; /* length of identifier */
- eight_bits t; /* desired ilk */
- {
- if (length(p)!=l) return 0;
- if (p->ilk!=t && !(t==normal && abnormal(p))) return 0;
- return !strncmp(first,p->byte_start,l);
- }
- void
- init_p(p,t)
- name_pointer p;
- eight_bits t;
- {
- p->ilk=t; p->xref=(char*)xmem;
- }
- void
- init_node(p)
- name_pointer p;
- {
- p->xref=(char*)xmem;
- }
- @ We have to get CEE/'s
- reserved words into the hash table, and the simplest way to do this is
- to insert them every time .{CWEAVE} is run. Fortunately there are relatively
- few reserved words. (Some of these are not strictly ``reserved,'' but
- are defined in header files of the ISO Standard CEE/ Library.)
- @^reserved words@>
- @<Store all the reserved words@>=
- id_lookup("asm",NULL,sizeof_like);
- id_lookup("auto",NULL,int_like);
- id_lookup("break",NULL,case_like);
- id_lookup("case",NULL,case_like);
- id_lookup("catch",NULL,catch_like);
- id_lookup("char",NULL,raw_int);
- id_lookup("class",NULL,struct_like);
- id_lookup("clock_t",NULL,raw_int);
- id_lookup("const",NULL,const_like);
- id_lookup("continue",NULL,case_like);
- id_lookup("default",NULL,case_like);
- id_lookup("define",NULL,define_like);
- id_lookup("defined",NULL,sizeof_like);
- id_lookup("delete",NULL,sizeof_like);
- id_lookup("div_t",NULL,raw_int);
- id_lookup("do",NULL,do_like);
- id_lookup("double",NULL,raw_int);
- id_lookup("elif",NULL,if_like);
- id_lookup("else",NULL,else_like);
- id_lookup("endif",NULL,if_like);
- id_lookup("enum",NULL,struct_like);
- id_lookup("error",NULL,if_like);
- id_lookup("extern",NULL,int_like);
- id_lookup("FILE",NULL,raw_int);
- id_lookup("float",NULL,raw_int);
- id_lookup("for",NULL,for_like);
- id_lookup("fpos_t",NULL,raw_int);
- id_lookup("friend",NULL,int_like);
- id_lookup("goto",NULL,case_like);
- id_lookup("if",NULL,if_like);
- id_lookup("ifdef",NULL,if_like);
- id_lookup("ifndef",NULL,if_like);
- id_lookup("include",NULL,if_like);
- id_lookup("inline",NULL,int_like);
- id_lookup("int",NULL,raw_int);
- id_lookup("jmp_buf",NULL,raw_int);
- id_lookup("ldiv_t",NULL,raw_int);
- id_lookup("line",NULL,if_like);
- id_lookup("long",NULL,raw_int);
- id_lookup("new",NULL,new_like);
- id_lookup("NULL",NULL,quoted);
- id_lookup("offsetof",NULL,sizeof_like);
- id_lookup("operator",NULL,operator_like);
- id_lookup("pragma",NULL,if_like);
- id_lookup("private",NULL,public_like);
- id_lookup("protected",NULL,public_like);
- id_lookup("ptrdiff_t",NULL,raw_int);
- id_lookup("public",NULL,public_like);
- id_lookup("register",NULL,int_like);
- id_lookup("return",NULL,case_like);
- id_lookup("short",NULL,raw_int);
- id_lookup("sig_atomic_t",NULL,raw_int);
- id_lookup("signed",NULL,raw_int);
- id_lookup("size_t",NULL,raw_int);
- id_lookup("sizeof",NULL,sizeof_like);
- id_lookup("static",NULL,int_like);
- id_lookup("struct",NULL,struct_like);
- id_lookup("switch",NULL,for_like);
- id_lookup("template",NULL,int_like);
- id_lookup("TeX",NULL,custom);
- id_lookup("this",NULL,quoted);
- id_lookup("throw",NULL,case_like);
- id_lookup("time_t",NULL,raw_int);
- id_lookup("try",NULL,else_like);
- id_lookup("typedef",NULL,typedef_like);
- id_lookup("undef",NULL,if_like);
- id_lookup("union",NULL,struct_like);
- id_lookup("unsigned",NULL,raw_int);
- id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */
- id_lookup("va_list",NULL,raw_int); /* ditto */
- id_lookup("virtual",NULL,int_like);
- id_lookup("void",NULL,raw_int);
- id_lookup("volatile",NULL,const_like);
- id_lookup("wchar_t",NULL,raw_int);
- id_lookup("while",NULL,for_like);
- @* Lexical scanning.
- Let us now consider the subroutines that read the .{CWEB} source file
- and break it into meaningful units. There are four such procedures:
- One simply skips to the next `.{@@ }' or `.{@@*}' that begins a
- section; another passes over the TEX/ text at the beginning of a
- section; the third passes over the TEX/ text in a CEE/ comment;
- and the last, which is the most interesting, gets the next token of
- a CEE/ text. They all use the pointers |limit| and |loc| into
- the line of input currently being studied.
- @ Control codes in .{CWEB}, which begin with `.{@@}', are converted
- into a numeric code designed to simplify .{CWEAVE}'s logic; for example,
- larger numbers are given to the control codes that denote more significant
- milestones, and the code of |new_section| should be the largest of
- all. Some of these numeric control codes take the place of |char|
- control codes that will not otherwise appear in the output of the
- scanning routines.
- @^ASCII code dependencies@>
- @d ignore 00 /* control code of no interest to .{CWEAVE} */
- @d verbatim 02 /* takes the place of extended ASCII .{char2} */
- @d begin_short_comment 03 /* CPLUSPLUS/ short comment */
- @d begin_comment 't' /* tab marks will not appear */
- @d underline 'n' /* this code will be intercepted without confusion */
- @d noop 0177 /* takes the place of ASCII delete */
- @d xref_roman 0203 /* control code for `.{@@^}' */
- @d xref_wildcard 0204 /* control code for `.{@@:}' */
- @d xref_typewriter 0205 /* control code for `.{@@.}' */
- @d TeX_string 0206 /* control code for `.{@@t}' */
- @f TeX_string TeX
- @d ord 0207 /* control code for `.{@@'}' */
- @d join 0210 /* control code for `.{@@&}' */
- @d thin_space 0211 /* control code for `.{@@,}' */
- @d math_break 0212 /* control code for `.{@@v}' */
- @d line_break 0213 /* control code for `.{@@/}' */
- @d big_line_break 0214 /* control code for `.{@@#}' */
- @d no_line_break 0215 /* control code for `.{@@+}' */
- @d pseudo_semi 0216 /* control code for `.{@@;}' */
- @d macro_arg_open 0220 /* control code for `.{@@[}' */
- @d macro_arg_close 0221 /* control code for `.{@@]}' */
- @d trace 0222 /* control code for `.{@@0}', `.{@@1}' and `.{@@2}' */
- @d translit_code 0223 /* control code for `.{@@l}' */
- @d output_defs_code 0224 /* control code for `.{@@h}' */
- @d format_code 0225 /* control code for `.{@@f}' and `.{@@s}' */
- @d definition 0226 /* control code for `.{@@d}' */
- @d begin_C 0227 /* control code for `.{@@c}' */
- @d section_name 0230 /* control code for `.{@@<}' */
- @d new_section 0231 /* control code for `.{@@ }' and `.{@@*}' */
- @ Control codes are converted to .{CWEAVE}'s internal
- representation by means of the table |ccode|.
- @<Global...@>=
- eight_bits ccode[256]; /* meaning of a char following .{@@} */
- @ @<Set ini...@>=
- {int c; for (c=0; c<256; c++) ccode[c]=0;}
- ccode[' ']=ccode['t']=ccode['n']=ccode['v']=ccode['r']=ccode['f']
- =ccode['*']=new_section;
- ccode['@@']='@@'; /* `quoted' at sign */
- ccode['=']=verbatim;
- ccode['d']=ccode['D']=definition;
- ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
- ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
- ccode['t']=ccode['T']=TeX_string;
- ccode['l']=ccode['L']=translit_code;
- ccode['q']=ccode['Q']=noop;
- ccode['h']=ccode['H']=output_defs_code;
- ccode['&']=join; ccode['<']=ccode['(']=section_name;
- ccode['!']=underline; ccode['^']=xref_roman;
- ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;
- ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;
- ccode['+']=no_line_break; ccode[';']=pseudo_semi;
- ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;
- ccode[''']=ord;
- @<Special control codes for debugging@>@;
- @ Users can write
- .{@@2}, .{@@1}, and .{@@0} to turn tracing fully on, partly on,
- and off, respectively.
- @<Special control codes...@>=
- ccode['0']=ccode['1']=ccode['2']=trace;
- @ The |skip_limbo| routine is used on the first pass to skip through
- portions of the input that are not in any sections, i.e., that precede
- the first section. After this procedure has been called, the value of
- |input_has_ended| will tell whether or not a section has actually been found.
- There's a complication that we will postpone until later: If the .{@@s}
- operation appears in limbo, we want to use it to adjust the default
- interpretation of identifiers.
- @<Predec...@>=
- void skip_limbo();
- @ @c
- void
- skip_limbo() {
- while(1) {
- if (loc>limit && get_line()==0) return;
- *(limit+1)='@@';
- while (*loc!='@@') loc++; /* look for '@@', then skip two chars */
- if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++];
- if (c==new_section) return;
- if (c==noop) skip_restricted();
- else if (c==format_code) @<Process simple format in limbo@>;
- }
- }
- }
- @ The |skip_TeX| routine is used on the first pass to skip through
- the TEX/ code at the beginning of a section. It returns the next
- control code or `.{v}' found in the input. A |new_section| is
- assumed to exist at the very end of the file.
- @f skip_TeX TeX
- @c
- unsigned
- skip_TeX() /* skip past pure TEX/ code */
- {
- while (1) {
- if (loc>limit && get_line()==0) return(new_section);
- *(limit+1)='@@';
- while (*loc!='@@' && *loc!='|') loc++;
- if (*loc++ =='|') return('|');
- if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
- }
- }
- @*1 Inputting the next token.
- As stated above, .{CWEAVE}'s most interesting lexical scanning routine is the
- |get_next| function that inputs the next token of CEE/ input. However,
- |get_next| is not especially complicated.
- The result of |get_next| is either a |char| code for some special character,
- or it is a special code representing a pair of characters (e.g., `.{!=}'),
- or it is the numeric value computed by the |ccode|
- table, or it is one of the following special codes:
- yskiphang |identifier|: In this case the global variables |id_first| and
- |id_loc| will have been set to the beginning and ending-plus-one locations
- in the buffer, as required by the |id_lookup| routine.
- yskiphang |string|: The string will have been copied into the array
- |section_text|; |id_first| and |id_loc| are set as above (now they are
- pointers into |section_text|).
- yskiphang |constant|: The constant is copied into |section_text|, with
- slight modifications; |id_first| and |id_loc| are set.
- yskipnoindent Furthermore, some of the control codes cause
- |get_next| to take additional actions:
- yskiphang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,
- |verbatim|: The values of |id_first| and |id_loc| will have been set to
- the beginning and ending-plus-one locations in the buffer.
- yskiphang |section_name|: In this case the global variable |cur_section| will
- point to the |byte_start| entry for the section name that has just been scanned.
- The value of |cur_section_char| will be |'('| if the section name was
- preceded by .{@@(} instead of .{@@<}.
- yskipnoindent If |get_next| sees `.{@@!}'
- it sets |xref_switch| to |def_flag| and goes on to the next token.
- @d constant 0200 /* CEE/ constant */
- @d string 0201 /* CEE/ string */
- @d identifier 0202 /* CEE/ identifier or reserved word */
- @<Global...@>=
- name_pointer cur_section; /* name of section just scanned */
- char cur_section_char; /* the character just before that name */
- @ @<Include...@>=
- #include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */
- #include <stdlib.h> /* definition of |exit| */
- @ As one might expect, |get_next| consists mostly of a big switch
- that branches to the various special cases that can arise.
- @d isxalpha(c) ((c)=='_') /* non-alpha character allowed in identifier */
- @d ishigh(c) ((eight_bits)(c)>0177)
- @^high-bit character handling@>
- @<Predecl...@>=
- eight_bits get_next();
- @ @c
- eight_bits
- get_next() /* produces the next input token */
- {@+eight_bits c; /* the current character */
- while (1) {
- @<Check if we're at the end of a preprocessor command@>;
- if (loc>limit && get_line()==0) return(new_section);
- c=*(loc++);
- if (xisdigit(c) || c=='\' || c=='.') @<Get a constant@>@;
- else if (c==''' || c=='"' || (c=='L'&&(*loc==''' || *loc=='"'))@|
- || (c=='<' && sharp_include_line==1))
- @<Get a string@>@;
- else if (xisalpha(c) || isxalpha(c) || ishigh(c))
- @<Get an identifier@>@;
- else if (c=='@@') @<Get control code and possible section name@>@;
- else if (xisspace(c)) continue; /* ignore spaces and tabs */
- if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>;
- mistake: @<Compress two-symbol operator@>@;
- return(c);
- }
- }
- @ Because preprocessor commands do not fit in with the rest of the syntax
- of CEE/,
- we have to deal with them separately. One solution is to enclose such
- commands between special markers. Thus, when a .# is seen as the
- first character of a line, |get_next| returns a special code
- |left_preproc| and raises a flag |preprocessing|.
- We can use the same internal code number for |left_preproc| as we do
- for |ord|, since |get_next| changes |ord| into a string.
- @d left_preproc ord /* begins a preprocessor command */
- @d right_preproc 0217 /* ends a preprocessor command */
- @<Glob...@>=
- boolean preprocessing=0; /* are we scanning a preprocessor command? */
- @ @<Raise prep...@>= {
- preprocessing=1;
- @<Check if next token is |include|@>;
- return (left_preproc);
- }
- @ An additional complication is the freakish use of .< and .> to delimit
- a file name in lines that start with .{#include}. We must treat this file
- name as a string.
- @<Glob...@>=
- boolean sharp_include_line=0; /* are we scanning a |#include| line? */
- @ @<Check if next token is |include|@>=
- while (loc<=buffer_end-7 && xisspace(*loc)) loc++;
- if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;
- @ When we get to the end of a preprocessor line,
- we lower the flag and send a code |right_preproc|, unless
- the last character was a .\.
- @<Check if we're at...@>=
- while (loc==limit-1 && preprocessing && *loc=='\')
- if (get_line()==0) return(new_section); /* still in preprocessor mode */
- if (loc>=limit && preprocessing) {
- preprocessing=sharp_include_line=0;
- return(right_preproc);
- }
- @ The following code assigns values to the combinations .{++},
- .{--}, .{->}, .{>=}, .{<=}, .{==}, .{<<}, .{>>}, .{!=}, .{vv}, and
- .{&&}, and to the CPLUSPLUS/
- combinations .{...}, .{::}, .{.*} and .{->*}.
- The compound assignment operators (e.g., .{+=}) are
- treated as separate tokens.
- @d compress(c) if (loc++<=limit) return(c)
- @<Compress tw...@>=
- switch(c) {
- case '/': if (*loc=='*') {compress(begin_comment);}
- else if (*loc=='/') compress(begin_short_comment); break;
- case '+': if (*loc=='+') compress(plus_plus); break;
- case '-': if (*loc=='-') {compress(minus_minus);}
- else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);}
- else compress(minus_gt); break;
- case '.': if (*loc=='*') {compress(period_ast);}
- else if (*loc=='.' && *(loc+1)=='.') {
- loc++; compress(dot_dot_dot);
- }
- break;
- case ':': if (*loc==':') compress(colon_colon); break;
- case '=': if (*loc=='=') compress(eq_eq); break;
- case '>': if (*loc=='=') {compress(gt_eq);}
- else if (*loc=='>') compress(gt_gt); break;
- case '<': if (*loc=='=') {compress(lt_eq);}
- else if (*loc=='<') compress(lt_lt); break;
- case '&': if (*loc=='&') compress(and_and); break;
- case '|': if (*loc=='|') compress(or_or); break;
- case '!': if (*loc=='=') compress(not_eq); break;
- }
- @ @<Get an identifier@>= {
- id_first=--loc;
- while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc));
- id_loc=loc; return(identifier);
- }
- @ Different conventions are followed by TEX/ and CEE/ to express octal
- and hexadecimal numbers; it is reasonable to stick to each convention
- within its realm. Thus the CEE/ part of a .{CWEB} file has octals
- introduced by .0 and hexadecimals by .{0x}, but .{CWEAVE} will print
- in italics or typewriter font, respectively, and introduced by single
- or double quotes. In order to simplify the TEX/ macro used to print
- such constants, we replace some of the characters.
- Notice that in this section and the next, |id_first| and |id_loc|
- are pointers into the array |section_text|, not into |buffer|.
- @<Get a constant@>= {
- id_first=id_loc=section_text+1;
- if (*(loc-1)=='\') {*id_loc++='~';
- while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
- else if (*(loc-1)=='0') {
- if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++;
- while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */
- else if (xisdigit(*loc)) {*id_loc++='~';
- while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */
- else goto dec; /* decimal constant */
- }
- else { /* decimal constant */
- if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
- dec: *id_loc++=*(loc-1);
- while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++;
- if (*loc=='e' || *loc=='E') { /* float constant */
- *id_loc++='_'; loc++;
- if (*loc=='+' || *loc=='-') *id_loc++=*loc++;
- while (xisdigit(*loc)) *id_loc++=*loc++;
- }
- }
- while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
- || *loc=='f' || *loc=='F') {
- *id_loc++='$'; *id_loc++=toupper(*loc); loc++;
- }
- return(constant);
- }
- @ CEE/ strings and character constants, delimited by double and single
- quotes, respectively, can contain newlines or instances of their own
- delimiters if they are protected by a backslash. We follow this
- convention, but do not allow the string to be longer than |longest_name|.
- @<Get a string@>= {
- char delim = c; /* what started the string */
- id_first = section_text+1;
- id_loc = section_text;
- if (delim==''' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';}
- *++id_loc=delim;
- if (delim=='L') { /* wide character constant */
- delim=*loc++; *++id_loc=delim;
- }
- if (delim=='<') delim='>'; /* for file names in |#include| lines */
- while (1) {
- if (loc>=limit) {
- if(*(limit-1)!='\') {
- err_print("! String didn't end"); loc=limit; break;
- @.String didn't end@>
- }
- if(get_line()==0) {
- err_print("! Input ended in middle of string"); loc=buffer; break;
- @.Input ended in middle of string@>
- }
- }
- if ((c=*loc++)==delim) {
- if (++id_loc<=section_text_end) *id_loc=c;
- break;
- }
- if (c=='\') if (loc>=limit) continue;
- else if (++id_loc<=section_text_end) {
- *id_loc = '\'; c=*loc++;
- }
- if (++id_loc<=section_text_end) *id_loc=c;
- }
- if (id_loc>=section_text_end) {
- printf("n! String too long: ");
- @.String too long@>
- term_write(section_text+1,25);
- printf("..."); mark_error;
- }
- id_loc++;
- return(string);
- }
- @ After an .{@@} sign has been scanned, the next character tells us
- whether there is more work to do.
- @<Get control code and possible section name@>= {
- c=*loc++;
- switch(ccode[(eight_bits)c]) {
- case translit_code: err_print("! Use @@l in limbo only"); continue;
- @.Use @@l in limbo...@>
- case underline: xref_switch=def_flag; continue;
- case trace: tracing=c-'0'; continue;
- case xref_roman: case xref_wildcard: case xref_typewriter:
- case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c);
- case section_name:
- @<Scan the section name and make |cur_section| point to it@>;
- case verbatim: @<Scan a verbatim string@>;
- case ord: @<Get a string@>;
- default: return(ccode[(eight_bits)c]);
- }
- }
- @ The occurrence of a section name sets |xref_switch| to zero,
- because the section name might (for example) follow &{int}.
- @<Scan the section name...@>= {
- char *k; /* pointer into |section_text| */
- cur_section_char=*(loc-1);
- @<Put section name into |section_text|@>;
- if (k-section_text>3 && strncmp(k-2,"...",3)==0)
- cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */
- else cur_section=section_lookup(section_text+1,k,0);
- xref_switch=0; return(section_name);
- }
- @ Section names are placed into the |section_text| array with consecutive spaces,
- tabs, and carriage-returns replaced by single spaces. There will be no
- spaces at the beginning or the end. (We set |section_text[0]=' '| to facilitate
- this, since the |section_lookup| routine uses |section_text[1]| as the first
- character of the name.)
- @<Set init...@>=section_text[0]=' ';
- @ @<Put section name...@>=
- k=section_text;
- while (1) {
- if (loc>limit && get_line()==0) {
- err_print("! Input ended in section name");
- @.Input ended in section name@>
- loc=buffer+1; break;
- }
- c=*loc;
- @<If end of name or erroneous control code, |break|@>;
- loc++; if (k<section_text_end) k++;
- if (xisspace(c)) {
- c=' '; if (*(k-1)==' ') k--;
- }
- *k=c;
- }
- if (k>=section_text_end) {
- printf("n! Section name too long: ");
- @.Section name too long@>
- term_write(section_text+1,25);
- printf("..."); mark_harmless;
- }
- if (*k==' ' && k>section_text) k--;
- @ @<If end of name...@>=
- if (c=='@@') {
- c=*(loc+1);
- if (c=='>') {
- loc+=2; break;
- }
- if (ccode[(eight_bits)c]==new_section) {
- err_print("! Section name didn't end"); break;
- @.Section name didn't end@>
- }
- if (c!='@@') {
- err_print("! Control codes are forbidden in section name"); break;
- @.Control codes are forbidden...@>
- }
- *(++k)='@@'; loc++; /* now |c==*loc| again */
- }
- @ This function skips over a restricted context at relatively high speed.
- @<Predecl...@>=
- void skip_restricted();
- @ @c
- void
- skip_restricted()
- {
- id_first=loc; *(limit+1)='@@';
- false_alarm:
- while (*loc!='@@') loc++;
- id_loc=loc;
- if (loc++>limit) {
- err_print("! Control text didn't end"); loc=limit;
- @.Control text didn't end@>
- }
- else {
- if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;}
- if (*loc++!='>')
- err_print("! Control codes are forbidden in control text");
- @.Control codes are forbidden...@>
- }
- }
- @ At the present point in the program we
- have |*(loc-1)==verbatim|; we set |id_first| to the beginning
- of the string itself, and |id_loc| to its ending-plus-one location in the
- buffer. We also set |loc| to the position just after the ending delimiter.
- @<Scan a verbatim string@>= {
- id_first=loc++; *(limit+1)='@@'; *(limit+2)='>';
- while (*loc!='@@' || *(loc+1)!='>') loc++;
- if (loc>=limit) err_print("! Verbatim string didn't end");
- @.Verbatim string didn't end@>
- id_loc=loc; loc+=2;
- return (verbatim);
- }
- @** Phase one processing.
- We now have accumulated enough subroutines to make it possible to carry out
- .{CWEAVE}'s first pass over the source file. If everything works right,
- both phase one and phase two of .{CWEAVE} will assign the same numbers to
- sections, and these numbers will agree with what .{CTANGLE} does.
- The global variable |next_control| often contains the most recent output of
- |get_next|; in interesting cases, this will be the control code that
- ended a section or part of a section.
- @<Global...@>=
- eight_bits next_control; /* control code waiting to be acting upon */
- @ The overall processing strategy in phase one has the following
- straightforward outline.
- @<Predecl...@>=
- void phase_one();
- @ @c
- void
- phase_one() {
- phase=1; reset_input(); section_count=0;
- skip_limbo(); change_exists=0;
- while (!input_has_ended)
- @<Store cross-reference data for the current section@>;
- changed_section[section_count]=change_exists;
- /* the index changes if anything does */
- phase=2; /* prepare for second phase */
- @<Print error messages about unused or undefined section names@>;
- }
- @ @<Store cross-reference data...@>=
- {
- if (++section_count==max_sections) overflow("section number");
- changed_section[section_count]=changing;
- /* it will become 1 if any line changes */
- if (*(loc-1)=='*' && show_progress) {
- printf("*%d",section_count);
- update_terminal; /* print a progress report */
- }
- @<Store cross-references in the TEX/ part of a section@>;
- @<Store cross-references in the definition part of a section@>;
- @<Store cross-references in the CEE/ part of a section@>;
- if (changed_section[section_count]) change_exists=1;
- }
- @ The |C_xref| subroutine stores references to identifiers in
- CEE/ text material beginning with the current value of |next_control|
- and continuing until |next_control| is `.{' or `.{v}', or until the next
- ``milestone'' is passed (i.e., |next_control>=format_code|). If
- |next_control>=format_code| when |C_xref| is called, nothing will happen;
- but if |next_control=='|'| upon entry, the procedure assumes that this is
- the `.{v}' preceding CEE/ text that is to be processed.
- The parameter |spec_ctrl| is used to change this behavior. In most cases
- |C_xref| is called with |spec_ctrl==ignore|, which triggers the default
- processing described above. If |spec_ctrl==section_name|, section names will
- be gobbled. This is used when CEE/ text in the TEX/ part or inside comments
- is parsed: It allows for section names to appear in pb, but these
- strings will not be entered into the cross reference lists since they are not
- definitions of section names.
- The program uses the fact that our internal code numbers satisfy
- the relations |xref_roman==identifier+roman| and |xref_wildcard==identifier
- +wildcard| and |xref_typewriter==identifier+typewriter|,
- as well as |normal==0|.
- @<Predecl...@>=
- void C_xref();
- @ @c
- void
- C_xref( spec_ctrl ) /* makes cross-references for CEE/ identifiers */
- eight_bits spec_ctrl;
- {
- name_pointer p; /* a referenced name */
- while (next_control<format_code || next_control==spec_ctrl) {
- if (next_control>=identifier && next_control<=xref_typewriter) {
- if (next_control>identifier) @<Replace |"@@@@"| by |"@@"| @>@;
- p=id_lookup(id_first, id_loc,next_control-identifier); new_xref(p);
- }
- if (next_control==section_name) {
- section_xref_switch=cite_flag;
- new_section_xref(cur_section);
- }
- next_control=get_next();
- if (next_control=='|' || next_control==begin_comment ||
- next_control==begin_short_comment) return;
- }
- }
- @ The |outer_xref| subroutine is like |C_xref| except that it begins
- with |next_control!='|'| and ends with |next_control>=format_code|. Thus, it
- handles CEE/ text with embedded comments.
- @<Predecl...@>=
- void outer_xref();
- @ @c
- void
- outer_xref() /* extension of |C_xref| */
- {
- int bal; /* brace level in comment */
- while (next_control<format_code)
- if (next_control!=begin_comment && next_control!=begin_short_comment)
- C_xref(ignore);
- else {
- boolean is_long_comment=(next_control==begin_comment);
- bal=copy_comment(is_long_comment,1); next_control='|';
- while (bal>0) {
- C_xref(section_name); /* do not reference section names in comments */
- if (next_control=='|') bal=copy_comment(is_long_comment,bal);
- else bal=0; /* an error message will occur in phase two */
- }
- }
- }
- @ In the TEX/ part of a section, cross-reference entries are made only for
- the identifiers in CEE/ texts enclosed in pb, or for control texts
- enclosed in .{@@^}$,ldots,$.{@@>} or .{@@.}$,ldots,$.{@@>}
- or .{@@:}$,ldots,$.{@@>}.
- @<Store cross-references in the T...@>=
- while (1) {
- switch (next_control=skip_TeX()) {
- case translit_code: err_print("! Use @@l in limbo only"); continue;
- @.Use @@l in limbo...@>
- case underline: xref_switch=def_flag; continue;
- case trace: tracing=*(loc-1)-'0'; continue;
- case '|': C_xref(section_name); break;
- case xref_roman: case xref_wildcard: case xref_typewriter:
- case noop: case section_name:
- loc-=2; next_control=get_next(); /* scan to .{@@>} */
- if (next_control>=xref_roman && next_control<=xref_typewriter) {
- @<Replace |"@@@@"| by |"@@"| @>@;
- new_xref(id_lookup(id_first, id_loc,next_control-identifier));
- }
- break;
- }
- if (next_control>=format_code) break;
- }
- @ @<Replace |"@@@@"| by |"@@"| @>=
- {
- char *src=id_first,*dst=id_first;
- while(src<id_loc){
- if(*src=='@@') src++;
- *dst++=*src++;
- }
- id_loc=dst;
- while (dst<src) *dst++=' '; /* clean up in case of error message display */
- }
- @ During the definition and CEE/ parts of a section, cross-references
- are made for all identifiers except reserved words. However, the right
- identifier in a format definition is not referenced, and the left
- identifier is referenced only if it has been explicitly
- underlined (preceded by .{@@!}).
- The TEX/ code in comments is, of course, ignored, except for
- CEE/ portions enclosed in pb; the text of a section name is skipped
- entirely, even if it contains pb constructions.
- The variables |lhs| and |rhs| point to the respective identifiers involved
- in a format definition.
- @<Global...@>=
- name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */
- @ When we get to the following code we have |next_control>=format_code|.
- @<Store cross-references in the d...@>=
- while (next_control<=definition) { /* |format_code| or |definition| */
- if (next_control==definition) {
- xref_switch=def_flag; /* implied .{@@!} */
- next_control=get_next();
- } else @<Process a format definition@>;
- outer_xref();
- }
- @ Error messages for improper format definitions will be issued in phase
- two. Our job in phase one is to define the |ilk| of a properly formatted
- identifier, and to remove cross-references to identifiers that we now
- discover should be unindexed.
- @<Process a form...@>= {
- next_control=get_next();
- if (next_control==identifier) {
- lhs=id_lookup(id_first, id_loc,normal); lhs->ilk=normal;
- if (xref_switch) new_xref(lhs);
- next_control=get_next();
- if (next_control==identifier) {
- rhs=id_lookup(id_first, id_loc,normal);
- lhs->ilk=rhs->ilk;
- if (unindexed(lhs)) { /* retain only underlined entries */
- xref_pointer q,r=NULL;
- for (q=(xref_pointer)lhs->xref;q>xmem;q=q->xlink)
- if (q->num<def_flag)
- if (r) r->xlink=q->xlink;
- else lhs->xref=(char*)q->xlink;
- else r=q;
- }
- next_control=get_next();
- }
- }
- }
- @ A much simpler processing of format definitions occurs when the
- definition is found in limbo.
- @<Process simple format in limbo@>=
- {
- if (get_next()!=identifier)
- err_print("! Missing left identifier of @@s");
- @.Missing left identifier...@>
- else {
- lhs=id_lookup(id_first,id_loc,normal);
- if (get_next()!=identifier)
- err_print("! Missing right identifier of @@s");
- @.Missing right identifier...@>
- else {
- rhs=id_lookup(id_first,id_loc,normal);
- lhs->ilk=rhs->ilk;
- }
- }
- }
- @ Finally, when the TEX/ and definition parts have been treated, we have
- |next_control>=begin_C|.
- @<Store cross-references in the CEE/...@>=
- if (next_control<=section_name) { /* |begin_C| or |section_name| */
- if (next_control==begin_C) section_xref_switch=0;
- else {
- section_xref_switch=def_flag;
- if(cur_section_char=='(' && cur_section!=name_dir)
- set_file_flag(cur_section);
- }
- do {
- if (next_control==section_name && cur_section!=name_dir)
- new_section_xref(cur_section);
- next_control=get_next(); outer_xref();
- } while ( next_control<=section_name);
- }
- @ After phase one has looked at everything, we want to check that each
- section name was both defined and used. The variable |cur_xref| will point
- to cross-references for the current section name of interest.
- @<Global...@>=
- xref_pointer cur_xref; /* temporary cross-reference pointer */
- boolean an_output; /* did |file_flag| precede |cur_xref|? */
- @ The following recursive procedure
- walks through the tree of section names and prints out anomalies.
- @^recursion@>
- @<Predecl...@>=
- void section_check();
- @ @c
- void
- section_check(p)
- name_pointer p; /* print anomalies in subtree |p| */
- {
- if (p) {
- section_check(p->llink);
- cur_xref=(xref_pointer)p->xref;
- if (cur_xref->num==file_flag) {an_output=1; cur_xref=cur_xref->xlink;}
- else an_output=0;
- if (cur_xref->num <def_flag) {
- printf("n! Never defined: <"); print_section_name(p); putchar('>'); mark_harmless;
- @.Never defined: <section name>@>
- }
- while (cur_xref->num >=cite_flag) cur_xref=cur_xref->xlink;
- if (cur_xref==xmem && !an_output) {
- printf("n! Never used: <"); print_section_name(p); putchar('>'); mark_harmless;
- @.Never used: <section name>@>
- }
- section_check(p->rlink);
- }
- }
- @ @<Print error messages about un...@>=section_check(root)
- @* Low-level output routines.
- The TEX/ output is supposed to appear in lines at most |line_length|
- characters long, so we place it into an output buffer. During the output
- process, |out_line| will hold the current line number of the line about to
- be output.
- @<Global...@>=
- char out_buf[line_length+1]; /* assembled characters */
- char *out_ptr; /* just after last character in |out_buf| */
- char *out_buf_end = out_buf+line_length; /* end of |out_buf| */
- int out_line; /* number of next line to be output */
- @ The |flush_buffer| routine empties the buffer up to a given breakpoint,
- and moves any remaining characters to the beginning of the next line.
- If the |per_cent| parameter is 1 a |'%'| is appended to the line
- that is being output; in this case the breakpoint |b| should be strictly
- less than |out_buf_end|. If the |per_cent| parameter is |0|,
- trailing blanks are suppressed.
- The characters emptied from the buffer form a new line of output;
- if the |carryover| parameter is true, a |"%"| in that line will be
- carried over to the next line (so that TEX/ will ignore the completion
- of commented-out text).
- @d c_line_write(c) fflush(active_file),fwrite(out_buf+1,sizeof(char),c,active_file)
- @d tex_putc(c) putc(c,active_file)
- @d tex_new_line putc('n',active_file)
- @d tex_printf(c) fprintf(active_file,c)
- @c
- void
- flush_buffer(b,per_cent,carryover)
- char *b; /* outputs from |out_buf+1| to |b|,where |b<=out_ptr| */
- boolean per_cent,carryover;
- {
- char *j; j=b; /* pointer into |out_buf| */
- if (! per_cent) /* remove trailing blanks */
- while (j>out_buf && *j==' ') j--;
- c_line_write(j-out_buf);
- if (per_cent) tex_putc('%');
- tex_new_line; out_line++;
- if (carryover)
- while (j>out_buf)
- if (*j--=='%' && (j==out_buf || *j!='\')) {
- *b--='%'; break;
- }
- if (b<out_ptr) strncpy(out_buf+1,b+1,out_ptr-b);
- out_ptr-=b-out_buf;
- }
- @ When we are copying TEX/ source material, we retain line breaks
- that occur in the input, except that an empty line is not
- output when the TEX/ source line was nonempty. For example, a line
- of the TEX/ file that contains only an index cross-reference entry
- will not be copied. The |finish_line| routine is called just before
- |get_line| inputs a new line, and just after a line break token has
- been emitted during the output of translated CEE/ text.
- @c
- void
- finish_line() /* do this at the end of a line */
- {
- char *k; /* pointer into |buffer| */
- if (out_ptr>out_buf) flush_buffer(out_ptr,0,0);
- else {
- for (k=buffer; k<=limit; k++)
- if (!(xisspace(*k))) return;
- flush_buffer(out_buf,0,0);
- }
- }
- @ In particular, the |finish_line| procedure is called near the very
- beginning of phase two. We initialize the output variables in a slightly
- tricky way so that the first line of the output file will be
- `.{\input cwebmac}'.
- @<Set init...@>=
- out_ptr=out_buf+1; out_line=1; active_file=tex_file;
- *out_ptr='c'; tex_printf("\input cwebma");
- @ When we wish to append one character |c| to the output buffer, we write
- `|out(c)|'; this will cause the buffer to be emptied if it was already
- full. If we want to append more than one character at once, we say
- |out_str(s)|, where |s| is a string containing the characters.
- A line break will occur at a space or after a single-nonletter
- TEX/ control sequence.
- @d out(c) {if (out_ptr>=out_buf_end) break_out(); *(++out_ptr)=c;}
- @c
- void
- out_str(s) /* output characters from |s| to end of string */
- char *s;
- {
- while (*s) out(*s++);
- }
- @ The |break_out| routine is called just before the output buffer is about
- to overflow. To make this routine a little faster, we initialize position
- 0 of the output buffer to `.\'; this character isn't really output.
- @<Set init...@>=
- out_buf[0]='\';
- @ A long line is broken at a blank space or just before a backslash that isn't
- preceded by another backslash. In the latter case, a |'%'| is output at
- the break.
- @<Predecl...@>=
- void break_out();
- @ @c
- void
- break_out() /* finds a way to break the output line */
- {
- char *k=out_ptr; /* pointer into |out_buf| */
- while (1) {
- if (k==out_buf) @<Print warning message, break the line, |return|@>;
- if (*k==' ') {
- flush_buffer(k,0,1); return;
- }
- if (*(k--)=='\' && *k!='\') { /* we've decreased |k| */
- flush_buffer(k,1,1); return;
- }
- }
- }
- @ We get to this section only in the unusual case that the entire output line
- consists of a string of backslashes followed by a string of nonblank
- non-backslashes. In such cases it is almost always safe to break the
- line by putting a |'%'| just before the last character.
- @<Print warning message...@>=
- {
- printf("n! Line had to be broken (output l. %d):n",out_line);
- @.Line had to be broken@>
- term_write(out_buf+1, out_ptr-out_buf-1);
- new_line; mark_harmless;
- flush_buffer(out_ptr-1,1,1); return;
- }
- @ Here is a macro that outputs a section number in decimal notation.
- The number to be converted by |out_section| is known to be less than
- |def_flag|, so it cannot have more than five decimal digits. If
- the section is changed, we output `.{\*}' just after the number.
- @c
- void
- out_section(n)
- sixteen_bits n;
- {
- char s[6];
- sprintf(s,"%d",n); out_str(s);
- if(changed_section[n]) out_str ("\*");
- @.\*@>
- }
- @ The |out_name| procedure is used to output an identifier or index
- entry, enclosing it in braces.
- @c
- void
- out_name(p)
- name_pointer p;
- {
- char *k, *k_end=(p+1)->byte_start; /* pointers into |byte_mem| */
- out('{');
- for (k=p->byte_start; k<k_end; k++) {
- if (isxalpha(*k)) out('\');
- out(*k);
- }
- out('}');
- }
- @* Routines that copy TEX/ material.
- During phase two, we use subroutines |copy_limbo|, |copy_TeX|, and
- |copy_comment| in place of the analogous |skip_limbo|, |skip_TeX|, and
- |skip_comment| that were used in phase one. (Well, |copy_comment|
- was actually written in such a way that it functions as |skip_comment|
- in phase one.)
- The |copy_limbo| routine, for example, takes TEX/ material that is not
- part of any section and transcribes it almost verbatim to the output file.
- The use of `.{@@}' signs is severely restricted in such material:
- `.{@@@@}' pairs are replaced by singletons; `.{@@l}' and `.{@@q}' and
- `.{@@s}' are interpreted.
- @c
- void
- copy_limbo()
- {
- char c;
- while (1) {
- if (loc>limit && (finish_line(), get_line()==0)) return;
- *(limit+1)='@@';
- while (*loc!='@@') out(*(loc++));
- if (loc++<=limit) {
- c=*loc++;
- if (ccode[(eight_bits)c]==new_section) break;
- switch (ccode[(eight_bits)c]) {
- case translit_code: out_str("\ATL"); break;
- @.\ATL@>
- case '@@': out('@@'); break;
- case noop: skip_restricted(); break;
- case format_code: if (get_next()==identifier) get_next();
- if (loc>=limit) get_line(); /* avoid blank lines in output */
- break; /* the operands of .{@@s} are ignored on this pass */
- default: err_print("! Double @@ should be used in limbo");
- @.Double @@ should be used...@>
- out('@@');
- }
- }
- }
- }
- @ The |copy_TeX| routine processes the TEX/ code at the beginning of a
- section; for example, the words you are now reading were copied in this
- way. It returns the next control code or `.{v}' found in the input.
- We don't copy spaces or tab marks into the beginning of a line. This
- makes the test for empty lines in |finish_line| work.
- @ @f copy_TeX TeX
- @c
- eight_bits
- copy_TeX()
- {
- char c; /* current character being copied */
- while (1) {
- if (loc>limit && (finish_line(), get_line()==0)) return(new_section);
- *(limit+1)='@@';
- while ((c=*(loc++))!='|' && c!='@@') {
- out(c);
- if (out_ptr==out_buf+1 && (xisspace(c))) out_ptr--;
- }
- if (c=='|') return('|');
- if (loc<=limit) return(ccode[(eight_bits)*(loc++)]);
- }
- }
- @ The |copy_comment| function issues a warning if more braces are opened than
- closed, and in the case of a more serious error it supplies enough
- braces to keep TEX/ from complaining about unbalanced braces.
- Instead of copying the TEX/ material
- into the output buffer, this function copies it into the token memory
- (in phase two only).
- The abbreviation |app_tok(t)| is used to append token |t| to the current
- token list, and it also makes sure that it is possible to append at least
- one further token without overflow.
- @d app_tok(c) {if (tok_ptr+2>tok_mem_end) overflow("token"); *(tok_ptr++)=c;}
- @<Predec...@>=
- int copy_comment();
- @ @c
- int copy_comment(is_long_comment,bal) /* copies TEX/ code in comments */
- boolean is_long_comment; /* is this a traditional CEE/ comment? */
- int bal; /* brace balance */
- {
- char c; /* current character being copied */
- while (1) {
- if (loc>limit) {
- if (is_long_comment) {
- if (get_line()==0) {
- err_print("! Input ended in mid-comment");
- @.Input ended in mid-comment@>
- loc=buffer+1; goto done;
- }
- }
- else {
- if (bal>1) err_print("! Missing } in comment");
- @.Missing } in comment@>
- goto done;
- }
- }
- c=*(loc++);
- if (c=='|') return(bal);
- if (is_long_comment) @<Check for end of comment@>;
- if (phase==2) {
- if (ishigh(c)) app_tok(quoted_char);
- app_tok(c);
- }
- @<Copy special things when |c=='@@', '\'|@>;
- if (c=='{') bal++;
- else if (c=='}') {
- if(bal>1) bal--;
- else {err_print("! Extra } in comment");
- @.Extra } in comment@>
- if (phase==2) tok_ptr--;
- }
- }
- }
- done:@<Clear |bal| and |return|@>;
- }
- @ @<Check for end of comment@>=
- if (c=='*' && *loc=='/') {
- loc++;
- if (bal>1) err_print("! Missing } in comment");
- @.Missing } in comment@>
- goto done;
- }
- @ @<Copy special things when |c=='@@'...@>=
- if (c=='@@') {
- if (*(loc++)!='@@') {
- err_print("! Illegal use of @@ in comment");
- @.Illegal use of @@...@>
- loc-=2; if (phase==2) *(tok_ptr-1)=' '; goto done;
- }
- }
- else if (c=='\' && *loc!='@@')
- if (phase==2) app_tok(*(loc++)) else loc++;
- @ We output
- enough right braces to keep TEX/ happy.
- @<Clear |bal|...@>=
- if (phase==2) while (bal-- >0) app_tok('}');
- return(0);
- @** Parsing.
- The most intricate part of .{CWEAVE} is its mechanism for converting
- CEE/-like code into TEX/ code, and we might as well plunge into this
- aspect of the program now. A ``bottom up'' approach is used to parse the
- CEE/-like material, since .{CWEAVE} must deal with fragmentary
- constructions whose overall ``part of speech'' is not known.
- At the lowest level, the input is represented as a sequence of entities
- that we shall call {it scraps}, where each scrap of information consists
- of two parts, its {it category} and its {it translation}. The category
- is essentially a syntactic class, and the translation is a token list that
- represents TEX/ code. Rules of syntax and semantics tell us how to
- combine adjacent scraps into larger ones, and if we are lucky an entire
- CEE/ text that starts out as hundreds of small scraps will join
- together into one gigantic scrap whose translation is the desired TEX/
- code. If we are unlucky, we will be left with several scraps that don't
- combine; their translations will simply be output, one by one.
- The combination rules are given as context-sensitive productions that are
- applied from left to right. Suppose that we are currently working on the
- sequence of scraps $s_1,s_2ldots s_n$. We try first to find the longest
- production that applies to an initial substring $s_1,s_2ldots,$; but if
- no such productions exist, we try to find the longest production
- applicable to the next substring $s_2,s_3ldots,$; and if that fails, we
- try to match $s_3,s_4ldots,$, etc.
- A production applies if the category codes have a given pattern. For
- example, one of the productions (see rule~3) is
- $$hbox{|exp| }left{matrix{hbox{|binop|}crhbox{|unorbinop|}}right}
- hbox{ |exp| }RAhbox{ |exp|}$$
- and it means that three consecutive scraps whose respective categories are
- |exp|, |binop| (or |unorbinop|),
- and |exp| are converted to one scrap whose category
- is |exp|. The translations of the original
- scraps are simply concatenated. The case of
- $$hbox{|exp| |comma| |exp| $RA$ |exp|} hskip4emE_1C,\{opt}9,E_2$$
- (rule 4) is only slightly more complicated:
- Here the resulting |exp| translation
- consists not only of the three original translations, but also of the
- tokens |opt| and 9 between the translations of the
- |comma| and the following |exp|.
- In the TEX/ file, this will specify an optional line break after the
- comma, with penalty 90.
- At each opportunity the longest possible production is applied. For
- example, if the current sequence of scraps is |int_like| |cast|
- |lbrace|, rule 31 is applied; but if the sequence is |int_like| |cast|
- followed by anything other than |lbrace|, rule 32 takes effect.
- Translation rules such as `$E_1C,\{opt}9,E_2$' above use subscripts
- to distinguish between translations of scraps whose categories have the
- same initial letter; these subscripts are assigned from left to right.
- @ Here is a list of the category codes that scraps can have.
- (A few others, like |int_like|, have already been defined; the
- |cat_name| array contains a complete list.)
- @d exp 1 /* denotes an expression, including perhaps a single identifier */
- @d unop 2 /* denotes a unary operator */
- @d binop 3 /* denotes a binary operator */
- @d unorbinop 4
- /* denotes an operator that can be unary or binary, depending on context */
- @d cast 5 /* denotes a cast */
- @d question 6 /* denotes a question mark and possibly the expressions flanking it */
- @d lbrace 7 /* denotes a left brace */
- @d rbrace 8 /* denotes a right brace */
- @d decl_head 9 /* denotes an incomplete declaration */
- @d comma 10 /* denotes a comma */
- @d lpar 11 /* denotes a left parenthesis or left bracket */
- @d rpar 12 /* denotes a right parenthesis or right bracket */
- @d prelangle 13 /* denotes `$<$' before we know what it is */
- @d prerangle 14 /* denotes `$>$' before we know what it is */
- @d langle 15 /* denotes `$<$' when it's used as angle bracket in a template */
- @d colcol 18 /* denotes `::' */
- @d base 19 /* denotes a colon that introduces a base specifier */
- @d decl 20 /* denotes a complete declaration */
- @d struct_head 21 /* denotes the beginning of a structure specifier */
- @d stmt 23 /* denotes a complete statement */
- @d function 24 /* denotes a complete function */
- @d fn_decl 25 /* denotes a function declarator */
- @d semi 27 /* denotes a semicolon */
- @d colon 28 /* denotes a colon */
- @d tag 29 /* denotes a statement label */
- @d if_head 30 /* denotes the beginning of a compound conditional */
- @d else_head 31 /* denotes a prefix for a compound statement */
- @d if_clause 32 /* pending .{if} together with a condition */
- @d lproc 35 /* begins a preprocessor command */
- @d rproc 36 /* ends a preprocessor command */
- @d insert 37 /* a scrap that gets combined with its neighbor */
- @d section_scrap 38 /* section name */
- @d dead 39 /* scrap that won't combine */
- @d begin_arg 58 /* .{@@[} */
- @d end_arg 59 /* .{@@]} */
- @<Glo...@>=
- char cat_name[256][12];
- eight_bits cat_index;
- @ @<Set in...@>=
- for (cat_index=0;cat_index<255;cat_index++)
- strcpy(cat_name[cat_index],"UNKNOWN");
- strcpy(cat_name[exp],"exp");
- strcpy(cat_name[unop],"unop");
- strcpy(cat_name[binop],"binop");
- strcpy(cat_name[unorbinop],"unorbinop");
- strcpy(cat_name[cast],"cast");
- strcpy(cat_name[question],"?");
- strcpy(cat_name[lbrace],"{"@q}@>);
- strcpy(cat_name[rbrace],@q{@>"}");
- strcpy(cat_name[decl_head],"decl_head");
- strcpy(cat_name[comma],",");
- strcpy(cat_name[lpar],"(");
- strcpy(cat_name[rpar],")");
- strcpy(cat_name[prelangle],"<");
- strcpy(cat_name[prerangle],">");
- strcpy(cat_name[langle],"\<");
- strcpy(cat_name[colcol],"::");
- strcpy(cat_name[base],"\:");
- strcpy(cat_name[decl],"decl");
- strcpy(cat_name[struct_head],"struct_head");
- strcpy(cat_name[stmt],"stmt");
- strcpy(cat_name[function],"function");
- strcpy(cat_name[fn_decl],"fn_decl");
- strcpy(cat_name[else_like],"else_like");
- strcpy(cat_name[semi],";");
- strcpy(cat_name[colon],":");
- strcpy(cat_name[tag],"tag");
- strcpy(cat_name[if_head],"if_head");
- strcpy(cat_name[else_head],"else_head");
- strcpy(cat_name[if_clause],"if()");
- strcpy(cat_name[lproc],"#{"@q}@>);
- strcpy(cat_name[rproc],@q{@>"#}");
- strcpy(cat_name[insert],"insert");
- strcpy(cat_name[section_scrap],"section");
- strcpy(cat_name[dead],"@@d");
- strcpy(cat_name[public_like],"public");
- strcpy(cat_name[operator_like],"operator");
- strcpy(cat_name[new_like],"new");
- strcpy(cat_name[catch_like],"catch");
- strcpy(cat_name[for_like],"for");
- strcpy(cat_name[do_like],"do");
- strcpy(cat_name[if_like],"if");
- strcpy(cat_name[raw_rpar],")?");
- strcpy(cat_name[raw_unorbin],"unorbinop?");
- strcpy(cat_name[const_like],"const");
- strcpy(cat_name[raw_int],"raw");
- strcpy(cat_name[int_like],"int");
- strcpy(cat_name[case_like],"case");
- strcpy(cat_name[sizeof_like],"sizeof");
- strcpy(cat_name[struct_like],"struct");
- strcpy(cat_name[typedef_like],"typedef");
- strcpy(cat_name[define_like],"define");
- strcpy(cat_name[begin_arg],"@@["@q]@>);
- strcpy(cat_name[end_arg],@q[@>"@@]");
- strcpy(cat_name[0],"zero");
- @ This code allows .{CWEAVE} to display its parsing steps.
- @c
- void
- print_cat(c) /* symbolic printout of a category */
- eight_bits c;
- {
- printf(cat_name[c]);
- }
- @ The token lists for translated TEX/ output contain some special control
- symbols as well as ordinary characters. These control symbols are
- interpreted by .{CWEAVE} before they are written to the output file.
- yskiphang |break_space| denotes an optional line break or an en space;
- yskiphang |force| denotes a line break;
- yskiphang |big_force| denotes a line break with additional vertical space;
- yskiphang |preproc_line| denotes that the line will be printed flush left;
- yskiphang |opt| denotes an optional line break (with the continuation
- line indented two ems with respect to the normal starting position)---this
- code is followed by an integer |n|, and the break will occur with penalty
- $10n$;
- yskiphang |backup| denotes a backspace of one em;
- yskiphang |cancel| obliterates any |break_space|, |opt|, |force|, or
- |big_force| tokens that immediately precede or follow it and also cancels any
- |backup| tokens that follow it;
- yskiphang |indent| causes future lines to be indented one more em;
- yskiphang |outdent| causes future lines to be indented one less em.
- yskipnoindent All of these tokens are removed from the TEX/ output that
- comes from CEE/ text between pb signs; |break_space| and |force| and
- |big_force| become single spaces in this mode. The translation of other
- CEE/ texts results in TEX/ control sequences .{\1}, .{\2},
- .{\3}, .{\4}, .{\5}, .{\6}, .{\7}, .{\8}
- corresponding respectively to
- |indent|, |outdent|, |opt|, |backup|, |break_space|, |force|,
- |big_force| and |preproc_line|.
- However, a sequence of consecutive `. ', |break_space|,
- |force|, and/or |big_force| tokens is first replaced by a single token
- (the maximum of the given ones).
- The token |math_rel| will be translated into
- .{\MRL{}, and it will get a matching .} later.
- Other control sequences in the TEX/ output will be
- `.{\\{}$,ldots,$.}'
- surrounding identifiers, `.{\&{}$,ldots,$.}' surrounding
- reserved words, `.{\.{}$,ldots,$.}' surrounding strings,
- `.{\C{}$,ldots,$.}$,$|force|' surrounding comments, and
- `.{\X$n$:}$,ldots,$.{\X}' surrounding section names, where
- |n| is the section number.
- @d math_rel 0206
- @d big_cancel 0210 /* like |cancel|, also overrides spaces */
- @d cancel 0211 /* overrides |backup|, |break_space|, |force|, |big_force| */
- @d indent 0212 /* one more tab (.{\1}) */
- @d outdent 0213 /* one less tab (.{\2}) */
- @d opt 0214 /* optional break in mid-statement (.{\3}) */
- @d backup 0215 /* stick out one unit to the left (.{\4}) */
- @d break_space 0216 /* optional break between statements (.{\5}) */
- @d force 0217 /* forced break between statements (.{\6}) */
- @d big_force 0220 /* forced break with additional space (.{\7}) */
- @d preproc_line 0221 /* begin line without indentation (.{\8}) */
- @^high-bit character handling@>
- @d quoted_char 0222
- /* introduces a character token in the range |0200|--|0377| */
- @d end_translation 0223 /* special sentinel token at end of list */
- @d inserted 0224 /* sentinel to mark translations of inserts */
- @ The raw input is converted into scraps according to the following table,
- which gives category codes followed by the translations.
- defstars {.{**}}%
- The symbol `stars' stands for `.{\&{{rm identifier}}}',
- i.e., the identifier itself treated as a reserved word.
- The right-hand column is the so-called |mathness|, which is explained
- further below.
- An identifier |c| of length 1 is translated as .{\v c} instead of
- as .{\\{c}}. An identifier .{CAPS} in all caps is translated as
- .{\.{CAPS}} instead of as .{\\{CAPS}}. An identifier that has
- become a reserved word via |typedef| is translated with .{\&} replacing
- .{\\} and |raw_int| replacing |exp|.
- A string of length greater than 20 is broken into pieces of size at most~20
- with discretionary breaks in between.
- yskiphalign{quad#hfil&quad#hfil&quadhfil#hfilcr
- .{!=}&|binop|: .{\I}&yescr
- .{<=}&|binop|: .{\Z}&yescr
- .{>=}&|binop|: .{\G}&yescr
- .{==}&|binop|: .{\E}&yescr
- .{&&}&|binop|: .{\W}&yescr
- .{vv}&|binop|: .{\V}&yescr
- .{++}&|binop|: .{\PP}&yescr
- .{--}&|binop|: .{\MM}&yescr
- .{->}&|binop|: .{\MG}&yescr
- .{>>}&|binop|: .{\GG}&yescr
- .{<<}&|binop|: .{\LL}&yescr
- .{::}&|colcol|: .{\DC}&maybecr
- .{.*}&|binop|: .{\PA}&yescr
- .{->*}&|binop|: .{\MGA}&yescr
- .{...}&|exp|: .{\,\ldots\,}&yescr
- ."string."&|exp|: .{\.{}string with special characters quoted.}&maybecr
- .{@@=}string.{@@>}&|exp|: .{\vb{}string with special characters
- quoted.}&maybecr
- .{@@'7'}&|exp|: .{\.{@@'7'}}&maybecr
- .{077} or .{\77}&|exp|: .{\T{\~77}}&maybecr
- .{0x7f}&|exp|: .{\T{\^7f}}&maybecr
- .{77}&|exp|: .{\T{77}}&maybecr
- .{77L}&|exp|: .{\T{77\$L}}&maybecr
- .{0.1E5}&|exp|: .{\T{0.1\_5}}&maybecr
- .+&|unorbinop|: .+&yescr
- .-&|unorbinop|: .-&yescr
- .*&|raw_unorbin|: .*&yescr
- ./&|binop|: ./&yescr
- .<&|prelangle|: .{\langle}&yescr
- .=&|binop|: .{\K}&yescr
- .>&|prerangle|: .{\rangle}&yescr
- ..&|binop|: ..&yescr
- .{v}&|binop|: .{\OR}&yescr
- .^&|binop|: .{\XOR}&yescr
- .%&|binop|: .{\MOD}&yescr
- .?&|question|: .{\?}&yescr
- .!&|unop|: .{\R}&yescr
- .~&|unop|: .{\CM}&yescr
- .&&|raw_unorbin|: .{\AND}&yescr
- .(&|lpar|: .(&maybecr
- .[&|lpar|: .[&maybecr
- .)&|raw_rpar|: .)&maybecr
- .]&|raw_rpar|: .]&maybecr
- .{&|lbrace|: .{&yescr
- .}&|lbrace|: .}&yescr
- .,&|comma|: .,&yescr
- .;&|semi|: .;&maybecr
- .:&|colon|: .:&maybecr
- .# (within line)&|unorbinop|: .{\#}&yescr
- .# (at beginning)&|lproc|: |force| |preproc_line| .{\#}&nocr
- end of .# line&|rproc|: |force|&nocr
- identifier&|exp|: .{\\{}identifier with underlines quoted.}&maybecr
- .{asm}&|sizeof_like|: stars&maybecr
- .{auto}&|int_like|: stars&maybecr
- .{break}&|case_like|: stars&maybecr
- .{case}&|case_like|: stars&maybecr
- .{catch}&|catch_like|: stars&maybecr
- .{char}&|raw_int|: stars&maybecr
- .{class}&|struct_like|: stars&maybecr
- .{clock_t}&|raw_int|: stars&maybecr
- .{const}&|const_like|: stars&maybecr
- .{continue}&|case_like|: stars&maybecr
- .{default}&|case_like|: stars&maybecr
- .{define}&|define_like|: stars&maybecr
- .{defined}&|sizeof_like|: stars&maybecr
- .{delete}&|sizeof_like|: stars&maybecr
- .{div_t}&|raw_int|: stars&maybecr
- .{do}&|do_like|: stars&maybecr
- .{double}&|raw_int|: stars&maybecr
- .{elif}&|if_like|: stars&maybecr
- .{else}&|else_like|: stars&maybecr
- .{endif}&|if_like|: stars&maybecr
- .{enum}&|struct_like|: stars&maybecr
- .{error}&|if_like|: stars&maybecr
- .{extern}&|int_like|: stars&maybecr
- .{FILE}&|raw_int|: stars&maybecr
- .{float}&|raw_int|: stars&maybecr
- .{for}&|for_like|: stars&maybecr
- .{fpos_t}&|raw_int|: stars&maybecr
- .{friend}&|int_like|: stars&maybecr
- .{goto}&|case_like|: stars&maybecr
- .{if}&|if_like|: stars&maybecr
- .{ifdef}&|if_like|: stars&maybecr
- .{ifndef}&|if_like|: stars&maybecr
- .{include}&|if_like|: stars&maybecr
- .{inline}&|int_like|: stars&maybecr
- .{int}&|raw_int|: stars&maybecr
- .{jmp_buf}&|raw_int|: stars&maybecr
- .{ldiv_t}&|raw_int|: stars&maybecr
- .{line}&|if_like|: stars&maybecr
- .{long}&|raw_int|: stars&maybecr
- .{new}&|new_like|: stars&maybecr
- .{NULL}&|exp|: .{\NULL}&yescr
- .{offsetof}&|sizeof_like|: stars&maybecr
- .{operator}&|operator_like|: stars&maybecr
- .{pragma}&|if_like|: stars&maybecr
- .{private}&|public_like|: stars&maybecr
- .{protected}&|public_like|: stars&maybecr
- .{ptrdiff_t}&|raw_int|: stars&maybecr
- .{public}&|public_like|: stars&maybecr
- .{register}&|int_like|: stars&maybecr
- .{return}&|case_like|: stars&maybecr
- .{short}&|raw_int|: stars&maybecr
- .{sig_atomic_t}&|raw_int|: stars&maybecr
- .{signed}&|raw_int|: stars&maybecr
- .{size_t}&|raw_int|: stars&maybecr
- .{sizeof}&|sizeof_like|: stars&maybecr
- .{static}&|int_like|: stars&maybecr
- .{struct}&|struct_like|: stars&maybecr
- .{switch}&|if_like|: stars&maybecr
- .{template}&|int_like|: stars&maybecr
- .{TeX}&|exp|: .{\TeX}&yescr
- .{this}&|exp|: .{\this}&yescr
- .{throw}&|case_like|: stars&maybecr
- .{time_t}&|raw_int|: stars&maybecr
- .{try}&|else_like|: stars&maybecr
- .{typedef}&|typedef_like|: stars&maybecr
- .{undef}&|if_like|: stars&maybecr
- .{union}&|struct_like|: stars&maybecr
- .{unsigned}&|raw_int|: stars&maybecr
- .{va_dcl}&|decl|: stars&maybecr
- .{va_list}&|raw_int|: stars&maybecr
- .{virtual}&|int_like|: stars&maybecr
- .{void}&|raw_int|: stars&maybecr
- .{volatile}&|const_like|: stars&maybecr
- .{wchar_t}&|raw_int|: stars&maybecr
- .{while}&|if_like|: stars&maybecr
- .{@@,}&|insert|: .{\,}&maybecr
- .{@@v}&|insert|: |opt| .0&maybecr
- .{@@/}&|insert|: |force|&nocr
- .{@@#}&|insert|: |big_force|&nocr
- .{@@+}&|insert|: |big_cancel| .{{}} |break_space|
- .{{}} |big_cancel|&nocr
- .{@@;}&|semi|: &maybecr
- .{@@[@q]@>}&|begin_arg|: &maybecr
- .{@q[@>@@]}&|end_arg|: &maybecr
- .{@@&}&|insert|: .{\J}&maybecr
- .{@@h}&|insert|: |force| .{\ATH} |force|&nocr
- .{@@<}thinspace section namethinspace.{@@>}&|section_scrap|:
- .{\X}$n$.:translated section name.{\X}&maybecr
- .{@@(@q)@>}thinspace section namethinspace.{@@>}&|section_scrap|:
- .{\X}$n$.{:\.{}section name with special characters
- quoted.{ }\X}&maybecr
- .{/*}comment.{*/}&|insert|: |cancel|
- .{\C{}translated comment.} |force|&nocr
- .{//}comment&|insert|: |cancel|
- .{\SHC{}translated comment.} |force|&nocr
- }
- The construction .{@@t}thinspace stuff/thinspace.{@@>} contributes
- .{\hbox{}thinspace stuff/thinspace.} to the following scrap.
- @i prod.w
- @* Implementing the productions.
- More specifically, a scrap is a structure consisting of a category
- |cat| and a |text_pointer| |trans|, which points to the translation in
- |tok_start|. When CEE/ text is to be processed with the grammar above,
- we form an array |scrap_info| containing the initial scraps.
- Our production rules have the nice property that the right-hand side is never
- longer than the left-hand side. Therefore it is convenient to use sequential
- allocation for the current sequence of scraps. Five pointers are used to
- manage the parsing:
- yskiphang |pp| is a pointer into |scrap_info|. We will try to match
- the category codes |pp->cat,@,@,(pp+1)->cat|$,,,ldots,$
- to the left-hand sides of productions.
- yskiphang |scrap_base|, |lo_ptr|, |hi_ptr|, and |scrap_ptr| are such that
- the current sequence of scraps appears in positions |scrap_base| through
- |lo_ptr| and |hi_ptr| through |scrap_ptr|, inclusive, in the |cat| and
- |trans| arrays. Scraps located between |scrap_base| and |lo_ptr| have
- been examined, while those in positions |>=hi_ptr| have not yet been
- looked at by the parsing process.
- yskipnoindent Initially |scrap_ptr| is set to the position of the final
- scrap to be parsed, and it doesn't change its value. The parsing process
- makes sure that |lo_ptr>=pp+3|, since productions have as many as four terms,
- by moving scraps from |hi_ptr| to |lo_ptr|. If there are
- fewer than |pp+3| scraps left, the positions up to |pp+3| are filled with
- blanks that will not match in any productions. Parsing stops when
- |pp==lo_ptr+1| and |hi_ptr==scrap_ptr+1|.
- Since the |scrap| structure will later be used for other purposes, we
- declare its second element as unions.
- @<Type...@>=
- typedef struct {
- eight_bits cat;
- eight_bits mathness;
- union {
- text_pointer Trans;
- @<Rest of |trans_plus| union@>@;
- } trans_plus;
- } scrap;
- typedef scrap *scrap_pointer;
- @ @d trans trans_plus.Trans /* translation texts of scraps */
- @<Global...@>=
- scrap scrap_info[max_scraps]; /* memory array for scraps */
- scrap_pointer scrap_info_end=scrap_info+max_scraps -1; /* end of |scrap_info| */
- scrap_pointer pp; /* current position for reducing productions */
- scrap_pointer scrap_base; /* beginning of the current scrap sequence */
- scrap_pointer scrap_ptr; /* ending of the current scrap sequence */
- scrap_pointer lo_ptr; /* last scrap that has been examined */
- scrap_pointer hi_ptr; /* first scrap that has not been examined */
- scrap_pointer max_scr_ptr; /* largest value assumed by |scrap_ptr| */
- @ @<Set init...@>=
- scrap_base=scrap_info+1;
- max_scr_ptr=scrap_ptr=scrap_info;
- @ Token lists in |@!tok_mem| are composed of the following kinds of
- items for TEX/ output.
- yskipitem{$bullet$}Character codes and special codes like |force| and
- |math_rel| represent themselves;
- item{$bullet$}|id_flag+p| represents .{\\{{rm identifier $p$}}};
- item{$bullet$}|res_flag+p| represents .{\&{{rm identifier $p$}}};
- item{$bullet$}|section_flag+p| represents section name |p|;
- item{$bullet$}|tok_flag+p| represents token list number |p|;
- item{$bullet$}|inner_tok_flag+p| represents token list number |p|, to be
- translated without line-break controls.
- @d id_flag 10240 /* signifies an identifier */
- @d res_flag 2*id_flag /* signifies a reserved word */
- @d section_flag 3*id_flag /* signifies a section name */
- @d tok_flag 4*id_flag /* signifies a token list */
- @d inner_tok_flag 5*id_flag /* signifies a token list in `pb' */
- @c
- void
- print_text(p) /* prints a token list for debugging; not used in |main| */
- text_pointer p;
- {
- token_pointer j; /* index into |tok_mem| */
- sixteen_bits r; /* remainder of token after the flag has been stripped off */
- if (p>=text_ptr) printf("BAD");
- else for (j=*p; j<*(p+1); j++) {
- r=*j%id_flag;
- switch (*j/id_flag) {
- case 1: printf("\\{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
- break; /* |id_flag| */
- case 2: printf("\&{"@q}@>); print_id((name_dir+r)); printf(@q{@>"}");
- break; /* |res_flag| */
- case 3: printf("<"); print_section_name((name_dir+r)); printf(">");
- break; /* |section_flag| */
- case 4: printf("[[%d]]",r); break; /* |tok_flag| */
- case 5: printf("|[[%d]]|",r); break; /* |inner_tok_flag| */
- default: @<Print token |r| in symbolic form@>;
- }
- }
- fflush(stdout);
- }
- @ @<Print token |r|...@>=
- switch (r) {
- case math_rel: printf("\mathrel{"@q}@>); break;
- case big_cancel: printf("[ccancel]"); break;
- case cancel: printf("[cancel]"); break;
- case indent: printf("[indent]"); break;
- case outdent: printf("[outdent]"); break;
- case backup: printf("[backup]"); break;
- case opt: printf("[opt]"); break;
- case break_space: printf("[break]"); break;
- case force: printf("[force]"); break;
- case big_force: printf("[fforce]"); break;
- case preproc_line: printf("[preproc]"); break;
- case quoted_char: j++; printf("[%o]",(unsigned)*j); break;
- case end_translation: printf("[quit]"); break;
- case inserted: printf("[inserted]"); break;
- default: putxchar(r);
- }
- @ The production rules listed above are embedded directly into .{CWEAVE},
- since it is easier to do this than to write an interpretive system
- that would handle production systems in general. Several macros are defined
- here so that the program for each production is fairly short.
- All of our productions conform to the general notion that some |k|
- consecutive scraps starting at some position |j| are to be replaced by a
- single scrap of some category |c| whose translation is composed from the
- translations of the disappearing scraps. After this production has been
- applied, the production pointer |pp| should change by an amount |d|. Such
- a production can be represented by the quadruple |(j,k,c,d)|. For example,
- the production `|exp@,comma@,exp| $RA$ |exp|' would be represented by
- `|(pp,3,exp,-2)|'; in this case the pointer |pp| should decrease by 2
- after the production has been applied, because some productions with
- |exp| in their second or third positions might now match,
- but no productions have
- |exp| in the fourth position of their left-hand sides. Note that
- the value of |d| is determined by the whole collection of productions, not
- by an individual one.
- The determination of |d| has been
- done by hand in each case, based on the full set of productions but not on
- the grammar of CEE/ or on the rules for constructing the initial
- scraps.
- We also attach a serial number to each production, so that additional
- information is available when debugging. For example, the program below
- contains the statement `|reduce(pp,3,exp,-2,4)|' when it implements
- the production just mentioned.
- Before calling |reduce|, the program should have appended the tokens of
- the new translation to the |tok_mem| array. We commonly want to append
- copies of several existing translations, and macros are defined to
- simplify these common cases. For example, \{app2}|(pp)| will append the
- translations of two consecutive scraps, |pp->trans| and |(pp+1)->trans|, to
- the current token list. If the entire new translation is formed in this
- way, we write `|squash(j,k,c,d,n)|' instead of `|reduce(j,k,c,d,n)|'. For
- example, `|squash(pp,3,exp,-2,3)|' is an abbreviation for `\{app3}|(pp);
- reduce(pp,3,exp,-2,3)|'.
- A couple more words of explanation:
- Both |big_app| and |app| append a token (while |big_app1| to |big_app4|
- append the specified number of scrap translations) to the current token list.
- The difference between |big_app| and |app| is simply that |big_app|
- checks whether there can be a conflict between math and non-math
- tokens, and intercalates a `.{$}' token if necessary. When in
- doubt what to use, use |big_app|.
- The |mathness| is an attribute of scraps that says whether they are
- to be printed in a math mode context or not. It is separate from the
- ``part of speech'' (the |cat|) because to make each |cat| have
- a fixed |mathness| (as in the original .{WEAVE}) would multiply the
- number of necessary production rules.
- The low two bits (i.e. |mathness % 4|) control the left boundary.
- (We need two bits because we allow cases |yes_math|, |no_math| and
- |maybe_math|, which can go either way.)
- The next two bits (i.e. |mathness / 4|) control the right boundary.
- If we combine two scraps and the right boundary of the first has
- a different mathness from the left boundary of the second, we
- insert a .{$} in between. Similarly, if at printing time some
- irreducible scrap has a |yes_math| boundary the scrap gets preceded
- or followed by a .{$}. The left boundary is |maybe_math| if and
- only if the right boundary is.
- The code below is an exact translation of the production rules into
- CEE/, using such macros, and the reader should have no difficulty
- understanding the format by comparing the code with the symbolic
- productions as they were listed earlier.
- @d no_math 2 /* should be in horizontal mode */
- @d yes_math 1 /* should be in math mode */
- @d maybe_math 0 /* works in either horizontal or math mode */
- @d big_app2(a) big_app1(a);big_app1(a+1)
- @d big_app3(a) big_app2(a);big_app1(a+2)
- @d big_app4(a) big_app3(a);big_app1(a+3)
- @d app(a) *(tok_ptr++)=a
- @d app1(a) *(tok_ptr++)=tok_flag+(int)((a)->trans-tok_start)
- @<Global...@>=
- int cur_mathness, init_mathness;
- @ @c
- void
- app_str(s)
- char *s;
- {
- while (*s) app_tok(*(s++));
- }
- void
- big_app(a)
- token a;
- {
- if (a==' ' || (a>=big_cancel && a<=big_force)) /* non-math token */ {
- if (cur_mathness==maybe_math) init_mathness=no_math;
- else if (cur_mathness==yes_math) app_str("{}$");
- cur_mathness=no_math;
- }
- else {
- if (cur_mathness==maybe_math) init_mathness=yes_math;
- else if (cur_mathness==no_math) app_str("${}");
- cur_mathness=yes_math;
- }
- app(a);
- }
- void
- big_app1(a)
- scrap_pointer a;
- {
- switch (a->mathness % 4) { /* left boundary */
- case (no_math):
- if (cur_mathness==maybe_math) init_mathness=no_math;
- else if (cur_mathness==yes_math) app_str("{}$");
- cur_mathness=a->mathness / 4; /* right boundary */
- break;
- case (yes_math):
- if (cur_mathness==maybe_math) init_mathness=yes_math;
- else if (cur_mathness==no_math) app_str("${}");
- cur_mathness=a->mathness / 4; /* right boundary */
- break;
- case (maybe_math): /* no changes */ break;
- }
- app(tok_flag+(int)((a)->trans-tok_start));
- }
- @ Let us consider the big switch for productions now, before looking
- at its context. We want to design the program so that this switch
- works, so we might as well not keep ourselves in suspense about exactly what
- code needs to be provided with a proper environment.
- @d cat1 (pp+1)->cat
- @d cat2 (pp+2)->cat
- @d cat3 (pp+3)->cat
- @d lhs_not_simple (pp->cat!=semi && pp->cat!=raw_int && pp->cat!=raw_unorbin
- && pp->cat!=raw_rpar && pp->cat!=const_like)
- @<Match a production at |pp|, or increase |pp| if there is no match@>= {
- if (cat1==end_arg && lhs_not_simple)
- if (pp->cat==begin_arg) squash(pp,2,exp,-2,110);
- else squash(pp,2,end_arg,-1,111);
- else if (cat1==insert) squash(pp,2,pp->cat,-2,0);
- else if (cat2==insert) squash(pp+1,2,(pp+1)->cat,-1,0);
- else if (cat3==insert) squash(pp+2,2,(pp+2)->cat,0,0);
- else
- switch (pp->cat) {
- case exp: @<Cases for |exp|@>; @+break;
- case lpar: @<Cases for |lpar|@>; @+break;