preproc.c
资源名称:nasm-0.98.zip [点击查看]
上传用户:yuppie_zhu
上传日期:2007-01-08
资源大小:535k
文件大小:83k
源码类别:
编译器/解释器
开发平台:
C/C++
- /* preproc.c macro preprocessor for the Netwide Assembler
- *
- * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
- * Julian Hall. All rights reserved. The software is
- * redistributable under the licence given in the file "Licence"
- * distributed in the NASM archive.
- *
- * initial version 18/iii/97 by Simon Tatham
- */
- /* Typical flow of text through preproc
- *
- * pp_getline gets tokenised lines, either
- *
- * from a macro expansion
- *
- * or
- * {
- * read_line gets raw text from stdmacpos, or predef, or current input file
- * tokenise converts to tokens
- * }
- *
- * expand_mmac_params is used to expand %1 etc., unless a macro is being
- * defined or a false conditional is being processed
- * (%0, %1, %+1, %-1, %%foo
- *
- * do_directive checks for directives
- *
- * expand_smacro is used to expand single line macros
- *
- * expand_mmacro is used to expand multi-line macros
- *
- * detoken is used to convert the line back to text
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <stddef.h>
- #include <string.h>
- #include <ctype.h>
- #include <limits.h>
- #include "nasm.h"
- #include "nasmlib.h"
- typedef struct SMacro SMacro;
- typedef struct MMacro MMacro;
- typedef struct Context Context;
- typedef struct Token Token;
- typedef struct Line Line;
- typedef struct Include Include;
- typedef struct Cond Cond;
- typedef struct IncPath IncPath;
- /*
- * Store the definition of a single-line macro.
- */
- struct SMacro {
- SMacro *next;
- char *name;
- int casesense;
- int nparam;
- int in_progress;
- Token *expansion;
- };
- /*
- * Store the definition of a multi-line macro. This is also used to
- * store the interiors of `%rep...%endrep' blocks, which are
- * effectively self-re-invoking multi-line macros which simply
- * don't have a name or bother to appear in the hash tables. %rep
- * blocks are signified by having a NULL `name' field.
- *
- * In a MMacro describing a `%rep' block, the `in_progress' field
- * isn't merely boolean, but gives the number of repeats left to
- * run.
- *
- * The `next' field is used for storing MMacros in hash tables; the
- * `next_active' field is for stacking them on istk entries.
- *
- * When a MMacro is being expanded, `params', `iline', `nparam',
- * `paramlen', `rotate' and `unique' are local to the invocation.
- */
- struct MMacro {
- MMacro *next;
- char *name;
- int casesense;
- int nparam_min, nparam_max;
- int plus; /* is the last parameter greedy? */
- int nolist; /* is this macro listing-inhibited? */
- int in_progress;
- Token *dlist; /* All defaults as one list */
- Token **defaults; /* Parameter default pointers */
- int ndefs; /* number of default parameters */
- Line *expansion;
- MMacro *next_active;
- MMacro *rep_nest; /* used for nesting %rep */
- Token **params; /* actual parameters */
- Token *iline; /* invocation line */
- int nparam, rotate, *paramlen;
- unsigned long unique;
- };
- /*
- * The context stack is composed of a linked list of these.
- */
- struct Context {
- Context *next;
- SMacro *localmac;
- char *name;
- unsigned long number;
- };
- /*
- * This is the internal form which we break input lines up into.
- * Typically stored in linked lists.
- *
- * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not
- * necessarily used as-is, but is intended to denote the number of
- * the substituted parameter. So in the definition
- *
- * %define a(x,y) ( (x) & ~(y) )
- *
- * the token representing `x' will have its type changed to
- * TOK_SMAC_PARAM, but the one representing `y' will be
- * TOK_SMAC_PARAM+1.
- *
- * TOK_INTERNAL_STRING is a dirty hack: it's a single string token
- * which doesn't need quotes around it. Used in the pre-include
- * mechanism as an alternative to trying to find a sensible type of
- * quote to use on the filename we were passed.
- */
- struct Token {
- Token *next;
- char *text;
- SMacro *mac; /* associated macro for TOK_SMAC_END */
- int type;
- };
- enum {
- TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING,
- TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_SMAC_PARAM,
- TOK_INTERNAL_STRING
- };
- /*
- * Multi-line macro definitions are stored as a linked list of
- * these, which is essentially a container to allow several linked
- * lists of Tokens.
- *
- * Note that in this module, linked lists are treated as stacks
- * wherever possible. For this reason, Lines are _pushed_ on to the
- * `expansion' field in MMacro structures, so that the linked list,
- * if walked, would give the macro lines in reverse order; this
- * means that we can walk the list when expanding a macro, and thus
- * push the lines on to the `expansion' field in _istk_ in reverse
- * order (so that when popped back off they are in the right
- * order). It may seem cockeyed, and it relies on my design having
- * an even number of steps in, but it works...
- *
- * Some of these structures, rather than being actual lines, are
- * markers delimiting the end of the expansion of a given macro.
- * This is for use in the cycle-tracking and %rep-handling code.
- * Such structures have `finishes' non-NULL, and `first' NULL. All
- * others have `finishes' NULL, but `first' may still be NULL if
- * the line is blank.
- */
- struct Line {
- Line *next;
- MMacro *finishes;
- Token *first;
- };
- /*
- * To handle an arbitrary level of file inclusion, we maintain a
- * stack (ie linked list) of these things.
- */
- struct Include {
- Include *next;
- FILE *fp;
- Cond *conds;
- Line *expansion;
- char *fname;
- int lineno, lineinc;
- MMacro *mstk; /* stack of active macros/reps */
- };
- /*
- * Include search path. This is simply a list of strings which get
- * prepended, in turn, to the name of an include file, in an
- * attempt to find the file if it's not in the current directory.
- */
- struct IncPath {
- IncPath *next;
- char *path;
- };
- /*
- * Conditional assembly: we maintain a separate stack of these for
- * each level of file inclusion. (The only reason we keep the
- * stacks separate is to ensure that a stray `%endif' in a file
- * included from within the true branch of a `%if' won't terminate
- * it and cause confusion: instead, rightly, it'll cause an error.)
- */
- struct Cond {
- Cond *next;
- int state;
- };
- enum {
- /*
- * These states are for use just after %if or %elif: IF_TRUE
- * means the condition has evaluated to truth so we are
- * currently emitting, whereas IF_FALSE means we are not
- * currently emitting but will start doing so if a %else comes
- * up. In these states, all directives are admissible: %elif,
- * %else and %endif. (And of course %if.)
- */
- COND_IF_TRUE, COND_IF_FALSE,
- /*
- * These states come up after a %else: ELSE_TRUE means we're
- * emitting, and ELSE_FALSE means we're not. In ELSE_* states,
- * any %elif or %else will cause an error.
- */
- COND_ELSE_TRUE, COND_ELSE_FALSE,
- /*
- * This state means that we're not emitting now, and also that
- * nothing until %endif will be emitted at all. It's for use in
- * two circumstances: (i) when we've had our moment of emission
- * and have now started seeing %elifs, and (ii) when the
- * condition construct in question is contained within a
- * non-emitting branch of a larger condition construct.
- */
- COND_NEVER
- };
- #define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE )
- /*
- * Condition codes. Note that we use c_ prefix not C_ because C_ is
- * used in nasm.h for the "real" condition codes. At _this_ level,
- * we treat CXZ and ECXZ as condition codes, albeit non-invertible
- * ones, so we need a different enum...
- */
- static char *conditions[] = {
- "a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le",
- "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no",
- "np", "ns", "nz", "o", "p", "pe", "po", "s", "z"
- };
- enum {
- c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE,
- c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO,
- c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_S, c_Z
- };
- static int inverse_ccs[] = {
- c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE,
- c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S,
- c_Z, c_NO, c_NP, c_PO, c_PE, c_NS, c_NZ
- };
- /*
- * Directive names.
- */
- static char *directives[] = {
- "%assign", "%clear", "%define", "%elif", "%elifctx", "%elifdef",
- "%elifid", "%elifidn", "%elifidni", "%elifnctx", "%elifndef",
- "%elifnid", "%elifnidn", "%elifnidni", "%elifnnum", "%elifnstr",
- "%elifnum", "%elifstr", "%else", "%endif", "%endm", "%endmacro",
- "%endrep", "%error", "%exitrep", "%iassign", "%idefine", "%if",
- "%ifctx", "%ifdef", "%ifid", "%ifidn", "%ifidni", "%ifnctx",
- "%ifndef", "%ifnid", "%ifnidn", "%ifnidni", "%ifnnum",
- "%ifnstr", "%ifnum", "%ifstr", "%imacro", "%include", "%line",
- "%macro", "%pop", "%push", "%rep", "%repl", "%rotate", "%undef"
- };
- enum {
- PP_ASSIGN, PP_CLEAR, PP_DEFINE, PP_ELIF, PP_ELIFCTX, PP_ELIFDEF,
- PP_ELIFID, PP_ELIFIDN, PP_ELIFIDNI, PP_ELIFNCTX, PP_ELIFNDEF,
- PP_ELIFNID, PP_ELIFNIDN, PP_ELIFNIDNI, PP_ELIFNNUM, PP_ELIFNSTR,
- PP_ELIFNUM, PP_ELIFSTR, PP_ELSE, PP_ENDIF, PP_ENDM, PP_ENDMACRO,
- PP_ENDREP, PP_ERROR, PP_EXITREP, PP_IASSIGN, PP_IDEFINE, PP_IF,
- PP_IFCTX, PP_IFDEF, PP_IFID, PP_IFIDN, PP_IFIDNI, PP_IFNCTX,
- PP_IFNDEF, PP_IFNID, PP_IFNIDN, PP_IFNIDNI, PP_IFNNUM,
- PP_IFNSTR, PP_IFNUM, PP_IFSTR, PP_IMACRO, PP_INCLUDE, PP_LINE,
- PP_MACRO, PP_POP, PP_PUSH, PP_REP, PP_REPL, PP_ROTATE, PP_UNDEF
- };
- static Context *cstk;
- static Include *istk;
- static IncPath *ipath = NULL;
- static efunc error;
- static evalfunc evaluate;
- static int pass; /* HACK: pass 0 = generate dependencies only */
- static unsigned long unique; /* unique identifier numbers */
- static Line *predef = NULL;
- static ListGen *list;
- /*
- * The number of hash values we use for the macro lookup tables.
- * FIXME: We should *really* be able to configure this at run time,
- * or even have the hash table automatically expanding when necessary.
- */
- #define NHASH 31
- /*
- * The current set of multi-line macros we have defined.
- */
- static MMacro *mmacros[NHASH];
- /*
- * The current set of single-line macros we have defined.
- */
- static SMacro *smacros[NHASH];
- /*
- * The multi-line macro we are currently defining, or the %rep
- * block we are currently reading, if any.
- */
- static MMacro *defining;
- /*
- * The number of macro parameters to allocate space for at a time.
- */
- #define PARAM_DELTA 16
- /*
- * The standard macro set: defined as `static char *stdmac[]'. Also
- * gives our position in the macro set, when we're processing it.
- */
- #include "macros.c"
- static char **stdmacpos;
- /*
- * The extra standard macros that come from the object format, if
- * any.
- */
- static char **extrastdmac = NULL;
- int any_extrastdmac;
- /*
- * Forward declarations.
- */
- static Token *expand_mmac_params (Token *tline);
- static Token *expand_smacro (Token *tline);
- static void make_tok_num(Token *tok, long val);
- /*
- * Macros for safe checking of token pointers, avoid *(NULL)
- */
- #define tok_type_(x,t) ((x) && (x)->type == (t))
- #define skip_white_(x) if (tok_type_((x), TOK_WHITESPACE)) (x)=(x)->next
- #define tok_is_(x,v) (tok_type_((x), TOK_OTHER) && !strcmp((x)->text,(v)))
- #define tok_isnt_(x,v) ((x) && ((x)->type!=TOK_OTHER || strcmp((x)->text,(v))))
- /*
- * The pre-preprocessing stage... This function translates line
- * number indications as they emerge from GNU cpp (`# lineno "file"
- * flags') into NASM preprocessor line number indications (`%line
- * lineno file').
- */
- static char *prepreproc(char *line)
- {
- int lineno, fnlen;
- char *fname, *oldline;
- if (line[0] == '#' && line[1] == ' ') {
- oldline = line;
- fname = oldline+2;
- lineno = atoi(fname);
- fname += strspn(fname, "0123456789 ");
- if (*fname == '"')
- fname++;
- fnlen = strcspn(fname, """);
- line = nasm_malloc(20+fnlen);
- sprintf(line, "%%line %d %.*s", lineno, fnlen, fname);
- nasm_free (oldline);
- }
- return line;
- }
- /*
- * The hash function for macro lookups. Note that due to some
- * macros having case-insensitive names, the hash function must be
- * invariant under case changes. We implement this by applying a
- * perfectly normal hash function to the uppercase of the string.
- */
- static int hash(char *s)
- {
- unsigned int h = 0;
- int i = 0;
- /*
- * Powers of three, mod 31.
- */
- static const int multipliers[] = {
- 1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10,
- 30, 28, 22, 4, 12, 5, 15, 14, 11, 2, 6, 18, 23, 7, 21
- };
- while (*s) {
- h += multipliers[i] * (unsigned char) (toupper(*s));
- s++;
- if (++i >= sizeof(multipliers)/sizeof(*multipliers))
- i = 0;
- }
- h %= NHASH;
- return h;
- }
- /*
- * Free a linked list of tokens.
- */
- static void free_tlist (Token *list)
- {
- Token *t;
- while (list) {
- t = list;
- list = list->next;
- nasm_free (t->text);
- nasm_free (t);
- }
- }
- /*
- * Free a linked list of lines.
- */
- static void free_llist (Line *list)
- {
- Line *l;
- while (list) {
- l = list;
- list = list->next;
- free_tlist (l->first);
- nasm_free (l);
- }
- }
- /*
- * Free an MMacro
- */
- static void free_mmacro (MMacro *m)
- {
- nasm_free (m->name);
- free_tlist (m->dlist);
- nasm_free (m->defaults);
- free_llist (m->expansion);
- nasm_free (m);
- }
- /*
- * Pop the context stack.
- */
- static void ctx_pop (void)
- {
- Context *c = cstk;
- SMacro *smac, *s;
- cstk = cstk->next;
- smac = c->localmac;
- while (smac) {
- s = smac;
- smac = smac->next;
- nasm_free (s->name);
- free_tlist (s->expansion);
- nasm_free (s);
- }
- nasm_free (c->name);
- nasm_free (c);
- }
- #define BUF_DELTA 512
- /*
- * Read a line from the top file in istk, handling multiple CR/LFs
- * at the end of the line read, and handling spurious ^Zs. Will
- * return lines from the standard macro set if this has not already
- * been done.
- */
- static char *read_line (void)
- {
- char *buffer, *p, *q;
- int bufsize;
- if (stdmacpos) {
- if (*stdmacpos) {
- char *ret = nasm_strdup(*stdmacpos++);
- if (!*stdmacpos && any_extrastdmac)
- {
- stdmacpos = extrastdmac;
- any_extrastdmac = FALSE;
- return ret;
- }
- /*
- * Nasty hack: here we push the contents of `predef' on
- * to the top-level expansion stack, since this is the
- * most convenient way to implement the pre-include and
- * pre-define features.
- */
- if (!*stdmacpos)
- {
- Line *pd, *l;
- Token *head, **tail, *t, *tt;
- for (pd = predef; pd; pd = pd->next) {
- head = NULL;
- tail = &head;
- for (t = pd->first; t; t = t->next) {
- tt = *tail = nasm_malloc(sizeof(Token));
- tt->next = NULL;
- tail = &tt->next;
- tt->type = t->type;
- tt->text = nasm_strdup(t->text);
- tt->mac = t->mac; /* always NULL here, in fact */
- }
- l = nasm_malloc(sizeof(Line));
- l->next = istk->expansion;
- l->first = head;
- l->finishes = FALSE;
- istk->expansion = l;
- }
- }
- return ret;
- }
- else {
- stdmacpos = NULL;
- }
- }
- bufsize = BUF_DELTA;
- buffer = nasm_malloc(BUF_DELTA);
- p = buffer;
- while (1) {
- q = fgets(p, bufsize-(p-buffer), istk->fp);
- if (!q)
- break;
- p += strlen(p);
- if (p > buffer && p[-1] == 'n') {
- break;
- }
- if (p-buffer > bufsize-10) {
- long offset = p-buffer;
- bufsize += BUF_DELTA;
- buffer = nasm_realloc(buffer, bufsize);
- p = buffer+offset; /* prevent stale-pointer problems */
- }
- }
- if (!q && p == buffer) {
- nasm_free (buffer);
- return NULL;
- }
- src_set_linnum(src_get_linnum() + istk->lineinc);
- /*
- * Play safe: remove CRs as well as LFs, if any of either are
- * present at the end of the line.
- */
- while (--p >= buffer && (*p == 'n' || *p == 'r'))
- *p = ' ';
- /*
- * Handle spurious ^Z, which may be inserted into source files
- * by some file transfer utilities.
- */
- buffer[strcspn(buffer, "