LLkAnalyzer.java
上传用户:afrynkmhm
上传日期:2007-01-06
资源大小:1262k
文件大小:36k
- package antlr;
- /* ANTLR Translator Generator
- * Project led by Terence Parr at http://www.jGuru.com
- * Software rights: http://www.antlr.org/RIGHTS.html
- *
- * $Id: //depot/code/org.antlr/release/antlr-2.7.0/antlr/LLkAnalyzer.java#1 $
- */
- import antlr.collections.impl.BitSet;
- import antlr.collections.impl.Vector;
- /**A linear-approximate LL(k) grammar analzyer.
- *
- * All lookahead elements are sets of token types.
- *
- * @author Terence Parr, John Lilley
- * @version 2.00a
- * @see antlr.Grammar
- * @see antlr.Lookahead
- */
- public class LLkAnalyzer implements LLkGrammarAnalyzer {
- // Set "analyzerDebug" to true
- public boolean DEBUG_ANALYZER = false;
- private AlternativeBlock currentBlock;
- protected Tool tool = null;
- protected Grammar grammar = null;
- // True if analyzing a lexical grammar
- protected boolean lexicalAnalysis = false;
- // Used for formatting bit sets in default (Java) format
- CharFormatter charFormatter = new JavaCharFormatter();
- /** Create an LLk analyzer */
- public LLkAnalyzer(Tool tool_) {
- tool = tool_;
- }
- /** Return true if someone used the '.' wildcard default idiom.
- * Either #(. children) or '.' as an alt by itself.
- */
- protected boolean altUsesWildcardDefault(Alternative alt) {
- AlternativeElement head = alt.head;
- // if element is #(. blah) then check to see if el is root
- if (head instanceof TreeElement &&
- ((TreeElement) head).root instanceof WildcardElement ) {
- return true;
- }
- if (head instanceof WildcardElement && head. next instanceof BlockEndElement) {
- return true;
- }
- return false;
- }
- /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
- * @return true if the block is deterministic
- */
- public boolean deterministic(AlternativeBlock blk) {
- /** The lookahead depth for this decision */
- int k=1; // start at k=1
- if ( DEBUG_ANALYZER ) System.out.println("deterministic("+blk+")");
- boolean det = true;
- int nalts = blk.alternatives.size();
- AlternativeBlock saveCurrentBlock = currentBlock;
- Alternative wildcardAlt = null;
- currentBlock = blk;
- /* don't allow nongreedy (...) blocks */
- if ( blk.greedy==false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock) ) {
- tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine());
- }
- // SPECIAL CASE: only one alternative. We don't need to check the
- // determinism, but other code expects the lookahead cache to be
- // set for the single alt.
- if ( nalts==1 ) {
- AlternativeElement e = blk.getAlternativeAt(0).head;
- currentBlock.alti = 0;
- blk.getAlternativeAt(0).cache[1] = e.look(1);
- blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
- currentBlock = saveCurrentBlock;
- return true; // always deterministic for one alt
- }
- outer:
- for (int i=0; i<nalts-1; i++) {
- currentBlock.alti = i;
- currentBlock.analysisAlt = i; // which alt are we analyzing?
- currentBlock.altj = i+1; // reset this alt. Haven't computed yet,
- // but we need the alt number.
- inner:
- // compare against other alternatives with lookahead depth k
- for (int j=i+1; j<nalts; j++) {
- currentBlock.altj = j;
- if ( DEBUG_ANALYZER ) System.out.println("comparing "+i+" against alt "+j);
- currentBlock.analysisAlt = j; // which alt are we analyzing?
- k = 1; // always attempt minimum lookahead possible.
-
- // check to see if there is a lookahead depth that distinguishes
- // between alternatives i and j.
- Lookahead[] r = new Lookahead[grammar.maxk+1];
- boolean haveAmbiguity;
- do {
- haveAmbiguity = false;
- if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
- Lookahead p,q;
- p = getAltLookahead(blk, i, k);
- q = getAltLookahead(blk, j, k);
- // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
- // Lookahead must be disjoint.
- if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
- if ( DEBUG_ANALYZER ) System.out.println("q is "+q.toString(",", charFormatter, grammar));
- // r[i] = p.fset.and(q.fset);
- r[k] = p.intersection(q);
- if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k].toString());
- if ( !r[k].nil() ) {
- haveAmbiguity = true;
- k++;
- }
- // go until no more lookahead to use or no intersection
- } while ( haveAmbiguity && k <= grammar.maxk );
- Alternative ai = blk.getAlternativeAt(i);
- Alternative aj = blk.getAlternativeAt(j);
- if ( haveAmbiguity ) {
- det = false;
- ai.lookaheadDepth = NONDETERMINISTIC;
- aj.lookaheadDepth = NONDETERMINISTIC;
- /* if ith alt starts with a syntactic predicate, computing the
- * lookahead is still done for code generation, but messages
- * should not be generated when comparing against alt j.
- * Alternatives with syn preds that are unnecessary do
- * not result in syn pred try-blocks.
- */
- if ( ai.synPred != null ) {
- if ( DEBUG_ANALYZER ) {
- System.out.println("alt "+i+" has a syn pred");
- }
- // The alt with the (...)=> block is nondeterministic for sure.
- // If the (...)=> conflicts with alt j, j is nondeterministic.
- // This prevents alt j from being in any switch statements.
- // move on to next alternative=>no possible ambiguity!
- // continue inner;
- }
- /* if ith alt starts with a semantic predicate, computing the
- * lookahead is still done for code generation, but messages
- * should not be generated when comparing against alt j.
- */
- else if ( ai.semPred != null ) {
- if ( DEBUG_ANALYZER ) {
- System.out.println("alt "+i+" has a sem pred");
- }
- }
- /* if jth alt is exactly the wildcard or wildcard root of tree,
- * then remove elements from alt i lookahead from alt j's lookahead.
- * Don't do an ambiguity warning.
- */
- else if ( altUsesWildcardDefault(aj) ) {
- // System.out.println("removing pred sets");
- // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
- wildcardAlt = aj;
- }
-
- /* If the user specified warnWhenFollowAmbig=false, then we
- * can turn off this warning IFF one of the alts is empty;
- * that is, it points immediately at the end block.
- */
- else if ( !blk.warnWhenFollowAmbig &&
- (ai.head instanceof BlockEndElement ||
- aj.head instanceof BlockEndElement) )
- {
- // System.out.println("ai.head pts to "+ai.head.getClass());
- // System.out.println("aj.head pts to "+aj.head.getClass());
- }
- /* If they have the generateAmbigWarnings option off for the block
- * then don't generate a warning.
- */
- else if ( !blk.generateAmbigWarnings ) {
- }
- /* If greedy=true and *one* empty alt shut off warning. */
- else if ( blk.greedySet && blk.greedy &&
- ((ai.head instanceof BlockEndElement &&
- !(aj.head instanceof BlockEndElement)) ||
- (aj.head instanceof BlockEndElement &&
- !(ai.head instanceof BlockEndElement))) ) {
- // System.out.println("greedy set to true; one alt empty");
- }
-
-
- /* We have no choice, but to report a nondetermism */
- else {
- tool.errorHandler.warnAltAmbiguity(
- grammar,
- blk, // the block
- lexicalAnalysis,// true if lexical
- grammar.maxk, // depth of ambiguity
- r, // set of linear ambiguities
- i, // first ambiguous alternative
- j // second ambiguous alternative
- );
- }
- }
- else {
- // a lookahead depth, k, was found where i and j do not conflict
- ai.lookaheadDepth = Math.max(ai.lookaheadDepth,k);
- aj.lookaheadDepth = Math.max(aj.lookaheadDepth,k);
- }
- }
- }
- // finished with block.
- // If had wildcard default clause idiom, remove competing lookahead
- /*
- if ( wildcardAlt!=null ) {
- removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
- }
- */
- currentBlock = saveCurrentBlock;
- return det;
- }
- /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
- * @return true if the block is deterministic
- */
- public boolean deterministic(OneOrMoreBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)+("+blk+")");
- AlternativeBlock saveCurrentBlock = currentBlock;
- currentBlock = blk;
- boolean blkOk = deterministic((AlternativeBlock)blk);
- // block has been checked, now check that what follows does not conflict
- // with the lookahead of the (...)+ block.
- boolean det = deterministicImpliedPath(blk);
- currentBlock = saveCurrentBlock;
- return det&&blkOk;
- }
- /**Is (...)* block LL(1)? Fill in alternative cache for this block.
- * @return true if the block is deterministic
- */
- public boolean deterministic(ZeroOrMoreBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("deterministic(...)*("+blk+")");
- AlternativeBlock saveCurrentBlock = currentBlock;
- currentBlock = blk;
- boolean blkOk = deterministic((AlternativeBlock)blk);
- // block has been checked, now check that what follows does not conflict
- // with the lookahead of the (...)* block.
- boolean det = deterministicImpliedPath(blk);
- currentBlock = saveCurrentBlock;
- return det&&blkOk;
- }
- /**Is this (...)* or (...)+ block LL(k)?
- * @return true if the block is deterministic
- */
- public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
- /** The lookahead depth for this decision considering implied exit path */
- int k;
- boolean det = true;
- Vector alts = blk.getAlternatives();
- int nalts = alts.size();
- currentBlock.altj = -1; // comparing against implicit optional/exit alt
- if ( DEBUG_ANALYZER ) System.out.println("deterministicImpliedPath");
- for (int i=0; i<nalts; i++) { // check follow against all alts
- Alternative alt = blk.getAlternativeAt(i);
-
- if ( alt.head instanceof BlockEndElement ) {
- tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine());
- }
-
- k = 1; // assume eac alt is LL(1) with exit branch
- // check to see if there is a lookahead depth that distinguishes
- // between alternative i and the exit branch.
- Lookahead[] r = new Lookahead[grammar.maxk+1];
- boolean haveAmbiguity;
- do {
- haveAmbiguity = false;
- if ( DEBUG_ANALYZER ) System.out.println("checking depth "+k+"<="+grammar.maxk);
- Lookahead p;
- Lookahead follow = blk.next.look(k);
- blk.exitCache[k] = follow;
- currentBlock.alti = i;
- p = getAltLookahead(blk, i, k);
- if ( DEBUG_ANALYZER ) System.out.println("follow is "+follow.toString(",", charFormatter, grammar));
- if ( DEBUG_ANALYZER ) System.out.println("p is "+p.toString(",", charFormatter, grammar));
- //r[k] = follow.fset.and(p.fset);
- r[k] = follow.intersection(p);
- if ( DEBUG_ANALYZER ) System.out.println("intersection at depth "+k+" is "+r[k]);
- if ( !r[k].nil() ) {
- haveAmbiguity = true;
- k++;
- }
- // go until no more lookahead to use or no intersection
- } while ( haveAmbiguity && k <= grammar.maxk );
- if ( haveAmbiguity )
- {
- det = false;
- alt.lookaheadDepth = NONDETERMINISTIC;
- blk.exitLookaheadDepth = NONDETERMINISTIC;
- Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
- /* If the user specified warnWhenFollowAmbig=false, then we
- * can turn off this warning.
- */
- if ( !blk.warnWhenFollowAmbig ) {
- }
- /* If they have the generateAmbigWarnings option off for the block
- * then don't generate a warning.
- */
- else if ( !blk.generateAmbigWarnings ) {
- }
- /* If greedy=true and alt not empty, shut off warning */
- else if ( blk.greedy==true && blk.greedySet &&
- !(ambigAlt.head instanceof BlockEndElement) ) {
- if ( DEBUG_ANALYZER ) System.out.println("greedy loop");
- }
- /* If greedy=false then shut off warning...will have
- * to add "if FOLLOW break"
- * block during code gen to compensate for removal of warning.
- */
- else if( blk.greedy==false &&
- !(ambigAlt.head instanceof BlockEndElement) ) {
- if ( DEBUG_ANALYZER ) System.out.println("nongreedy loop");
- // if FOLLOW not single k-string (|set[k]| can
- // be > 1 actually) then must warn them that
- // loop may terminate incorrectly.
- // For example, ('a'..'d')+ ("ad"|"cb")
- if ( !lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk) ) {
- tool.warning(new String[] {
- "nongreedy block may exit incorrectly due",
- "tto limitations of linear approximate lookahead (first k-1 sets",
- "tin lookahead not singleton)."},
- grammar.getFilename(), blk.getLine());
- }
- }
- // no choice but to generate a warning
- else {
- tool.errorHandler.warnAltExitAmbiguity(
- grammar,
- blk, // the block
- lexicalAnalysis, // true if lexical
- grammar.maxk, // depth of ambiguity
- r, // set of linear ambiguities
- i // ambiguous alternative
- );
- }
- }
- else {
- alt.lookaheadDepth = Math.max(alt.lookaheadDepth,k);
- blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth,k);
- }
- }
- return det;
- }
- /**Compute the lookahead set of whatever follows references to
- * the rule associated witht the FOLLOW block.
- */
- public Lookahead FOLLOW(int k, RuleEndElement end) {
- // what rule are we trying to compute FOLLOW of?
- RuleBlock rb = (RuleBlock)end.block;
- // rule name is different in lexer
- String rule;
- if (lexicalAnalysis) {
- rule = CodeGenerator.lexerRuleName(rb.getRuleName());
- } else {
- rule = rb.getRuleName();
- }
- if ( DEBUG_ANALYZER ) System.out.println("FOLLOW("+k+","+rule+")");
- // are we in the midst of computing this FOLLOW already?
- if ( end.lock[k] ) {
- if ( DEBUG_ANALYZER ) System.out.println("FOLLOW cycle to "+rule);
- return new Lookahead(rule);
- }
- // Check to see if there is cached value
- if ( end.cache[k]!=null ) {
- if ( DEBUG_ANALYZER ) {
- System.out.println("cache entry FOLLOW("+k+") for "+rule+": "+end.cache[k].toString(",", charFormatter, grammar));
- }
- // if the cache is a complete computation then simply return entry
- if ( end.cache[k].cycle==null ) {
- return (Lookahead)end.cache[k].clone();
- }
- // A cache entry exists, but it is a reference to a cyclic computation.
- RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
- RuleEndElement re = rs.getBlock().endNode;
- // The other entry may not exist because it is still being
- // computed when this cycle cache entry was found here.
- if ( re.cache[k]==null ) {
- // return the cycle...that's all we can do at the moment.
- return (Lookahead)end.cache[k].clone();
- }
- else {
- // replace this cache entry with the entry from the referenced computation.
- // Eventually, this percolates a complete (no cycle reference) cache entry
- // to this node (or at least gets it closer and closer). This is not
- // crucial, but makes cache lookup faster as we might have to look up
- // lots of cycle references before finding a complete reference.
- end.cache[k] = re.cache[k];
- // Return the cache entry associated with the cycle reference.
- return (Lookahead)re.cache[k].clone();
- }
- }
- end.lock[k] = true; // prevent FOLLOW computation cycles
- Lookahead p = new Lookahead();
- RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
- // Walk list of references to this rule to compute FOLLOW
- for (int i=0; i<rs.numReferences(); i++) {
- RuleRefElement rr = rs.getReference(i);
- if ( DEBUG_ANALYZER ) System.out.println("next["+rule+"] is "+rr.next.toString());
- Lookahead q = rr.next.look(k);
- if ( DEBUG_ANALYZER ) System.out.println("FIRST of next["+rule+"] ptr is "+q.toString());
- /* If there is a cycle then if the cycle is to the rule for
- * this end block, you have a cycle to yourself. Remove the
- * cycle indication--the lookahead is complete.
- */
- if ( q.cycle!=null && q.cycle.equals(rule) ) {
- q.cycle = null; // don't want cycle to yourself!
- }
- // add the lookahead into the current FOLLOW computation set
- p.combineWith(q);
- if ( DEBUG_ANALYZER ) System.out.println("combined FOLLOW["+rule+"] is "+p.toString());
- }
- end.lock[k] = false; // we're not doing FOLLOW anymore
- // if no rules follow this, it can be a start symbol or called by a start sym.
- // set the follow to be end of file.
- if ( p.fset.nil() && p.cycle==null ) {
- if ( grammar instanceof TreeWalkerGrammar ) {
- // Tree grammars don't see EOF, they see end of sibling list or
- // "NULL TREE LOOKAHEAD".
- p.fset.add(Token.NULL_TREE_LOOKAHEAD);
- }
- else if ( grammar instanceof LexerGrammar ) {
- // Lexical grammars use Epsilon to indicate that the end of rule has been hit
- // EOF would be misleading; any character can follow a token rule not just EOF
- // as in a grammar (where a start symbol is followed by EOF). There is no
- // sequence info in a lexer between tokens to indicate what is the last token
- // to be seen.
- // p.fset.add(EPSILON_TYPE);
- p.setEpsilon();
- }
- else {
- p.fset.add(Token.EOF_TYPE);
- }
- }
- // Cache the result of the FOLLOW computation
- if ( DEBUG_ANALYZER ) {
- System.out.println("saving FOLLOW("+k+") for "+rule+": "+p.toString(",", charFormatter, grammar));
- }
- end.cache[k] = (Lookahead)p.clone();
- return p;
- }
- private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
- Lookahead p;
- Alternative a = blk.getAlternativeAt(alt);
- AlternativeElement e = a.head;
- //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
- if ( a.cache[k]==null ) {
- p = e.look(k);
- a.cache[k] = p;
- }
- else {
- p = a.cache[k];
- }
- return p;
- }
- /**Actions are ignored */
- public Lookahead look(int k, ActionElement action) {
- if ( DEBUG_ANALYZER ) System.out.println("lookAction("+k+","+action+")");
- return action.next.look(k);
- }
- /**Combine the lookahead computed for each alternative */
- public Lookahead look(int k, AlternativeBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("lookAltBlk(" + k + "," + blk + ")");
- AlternativeBlock saveCurrentBlock = currentBlock;
- currentBlock = blk;
- Lookahead p = new Lookahead();
- for (int i=0; i<blk.alternatives.size(); i++) {
- if ( DEBUG_ANALYZER ) System.out.println("alt " + i);
- // must set analysis alt
- currentBlock.analysisAlt = i;
- AlternativeElement elem = blk.getAlternativeAt(i).head;
- Lookahead q = elem.look(k);
- p.combineWith(q);
- }
- if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
- // Invert the lookahead set
- if (lexicalAnalysis) {
- BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
- int[] elems = p.fset.toArray();
- for (int j = 0; j < elems.length; j++) {
- b.remove(elems[j]);
- }
- p.fset = b;
- } else {
- p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
- }
- }
- currentBlock = saveCurrentBlock;
- return p;
- }
- /**Compute what follows this place-holder node and possibly
- * what begins the associated loop unless the
- * node is locked.
- * <p>
- * if we hit the end of a loop, we have to include
- * what tokens can begin the loop as well. If the start
- * node is locked, then we simply found an empty path
- * through this subrule while analyzing it. If the
- * start node is not locked, then this node was hit
- * during a FOLLOW operation and the FIRST of this
- * block must be included in that lookahead computation.
- */
- public Lookahead look(int k, BlockEndElement end) {
- if ( DEBUG_ANALYZER ) System.out.println("lookBlockEnd("+k+")");
- if ( end.lock[k] ) {
- // computation in progress => the tokens we would have
- // computed (had we not been locked) will be included
- // in the set by that computation with the lock on this
- // node.
- return new Lookahead();
- }
- end.lock[k] = true;
- Lookahead p;
-
- /* Hitting the end of a loop means you can see what begins the loop */
- if ( end.block instanceof ZeroOrMoreBlock ||
- end.block instanceof OneOrMoreBlock ) {
- // compute what can start the block
- p = look(k, end.block);
- }
- else {
- p = new Lookahead();
- }
- /* Tree blocks do not have any follow because they are children
- * of what surrounds them. For example, A #(B C) D results in
- * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
- * indicates that nothing can follow the last node of tree #(B C)
- */
- if (end.block instanceof TreeElement) {
- p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
- }
-
- /* Syntactic predicates such as ( (A)? )=> have no follow per se.
- * We cannot accurately say what would be matched following a
- * syntactic predicate (you MIGHT be ok if you said it was whatever
- * followed the alternative predicted by the predicate). Hence,
- * (like end-of-token) we return Epsilon to indicate "unknown
- * lookahead."
- */
- else if ( end.block instanceof SynPredBlock ) {
- p.setEpsilon();
- }
- // compute what can follow the block
- else {
- Lookahead q = end.block.next.look(k);
- p.combineWith(q);
- }
-
- end.lock[k] = false;
- return p;
- }
- /**Return this char as the lookahead if k=1.
- * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
- * <p>
- * If the atom has the <tt>not</tt> flag on, then
- * create the set complement of the tokenType
- * which is the set of all characters referenced
- * in the grammar with this char turned off.
- * Also remove characters from the set that
- * are currently allocated for predicting
- * previous alternatives. This avoids ambiguity
- * messages and is more properly what is meant.
- * ( 'a' | ~'a' ) implies that the ~'a' is the
- * "else" clause.
- * <p>
- * NOTE: we do <b>NOT</b> include exit path in
- * the exclusion set. E.g.,
- * ( 'a' | ~'a' )* 'b'
- * should exit upon seeing a 'b' during the loop.
- */
- public Lookahead look(int k, CharLiteralElement atom) {
- if ( DEBUG_ANALYZER ) System.out.println("lookCharLiteral("+k+","+atom+")");
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return atom.next.look(k-1);
- }
- if ( lexicalAnalysis) {
- if (atom.not) {
- BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
- if ( DEBUG_ANALYZER ) System.out.println("charVocab is "+b.toString());
- // remove stuff predicted by preceding alts and follow of block
- removeCompetingPredictionSets(b, atom);
- if ( DEBUG_ANALYZER ) System.out.println("charVocab after removal of prior alt lookahead "+b.toString());
- // now remove element that is stated not to be in the set
- b.clear(atom.getType());
- return new Lookahead(b);
- } else {
- return Lookahead.of(atom.getType());
- }
- }
- else {
- // Should have been avoided by MakeGrammar
- tool.panic("Character literal reference found in parser");
- // ... so we make the compiler happy
- return Lookahead.of(atom.getType());
- }
- }
- public Lookahead look(int k, CharRangeElement r) {
- if ( DEBUG_ANALYZER ) System.out.println("lookCharRange("+k+","+r+")");
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return r.next.look(k-1);
- }
- BitSet p = BitSet.of(r.begin);
- for (int i=r.begin+1; i<=r.end; i++) {
- p.add(i);
- }
- return new Lookahead(p);
- }
- public Lookahead look(int k, GrammarAtom atom) {
- if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+atom+"["+atom.getType()+"])");
-
- if ( lexicalAnalysis ) {
- // MakeGrammar should have created a rule reference instead
- tool.panic("token reference found in lexer");
- }
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return atom.next.look(k-1);
- }
- Lookahead l = Lookahead.of(atom.getType());
- if (atom.not) {
- // Invert the lookahead set against the token vocabulary
- int maxToken = grammar.tokenManager.maxTokenType();
- l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
- // remove stuff predicted by preceding alts and follow of block
- removeCompetingPredictionSets(l.fset, atom);
- }
- return l;
- }
- /**The lookahead of a (...)+ block is the combined lookahead of
- * all alternatives and, if an empty path is found, the lookahead
- * of what follows the block.
- */
- public Lookahead look(int k, OneOrMoreBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("look+"+k+","+blk+")");
- Lookahead p = look(k, (AlternativeBlock)blk);
- return p;
- }
- /**Combine the lookahead computed for each alternative.
- * Lock the node so that no other computation may come back
- * on itself--infinite loop. This also implies infinite left-recursion
- * in the grammar (or an error in this algorithm ;)).
- */
- public Lookahead look(int k, RuleBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("lookRuleBlk("+k+","+blk+")");
- Lookahead p = look(k, (AlternativeBlock)blk);
- return p;
- }
- /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
- * <p>
- * TJP says 8/12/99: not true anymore:
- * Lexical rules never compute follow. They set epsilon and
- * the code generator gens code to check for any character.
- * The code generator must remove the tokens used to predict
- * any previous alts in the same block.
- * <p>
- * When the last node of a rule is reached and noFOLLOW,
- * it implies that a "local" FOLLOW will be computed
- * after this call. I.e.,
- * <pre>
- * a : b A;
- * b : B | ;
- * c : b C;
- * </pre>
- * Here, when computing the look of rule b from rule a,
- * we want only {B,EPSILON_TYPE} so that look(b A) will
- * be {B,A} not {B,A,C}.
- * <p>
- * if the end block is not locked and the FOLLOW is
- * wanted, the algorithm must compute the lookahead
- * of what follows references to this rule. If
- * end block is locked, FOLLOW will return an empty set
- * with a cycle to the rule associated with this end block.
- */
- public Lookahead look(int k, RuleEndElement end) {
- if ( DEBUG_ANALYZER )
- System.out.println("lookRuleBlockEnd("+k+"); noFOLLOW="+
- end.noFOLLOW+"; lock is "+end.lock[k]);
- if ( /*lexicalAnalysis ||*/ end.noFOLLOW ) {
- Lookahead p = new Lookahead();
- p.setEpsilon();
- p.epsilonDepth = BitSet.of(k);
- return p;
- }
- Lookahead p = FOLLOW(k,end);
- return p;
- }
- /**Compute the lookahead contributed by a rule reference.
- *
- * <p>
- * When computing ruleref lookahead, we don't want the FOLLOW
- * computation done if an empty path exists for the rule.
- * The FOLLOW is too loose of a set...we want only to
- * include the "local" FOLLOW or what can follow this
- * particular ref to the node. In other words, we use
- * context information to reduce the complexity of the
- * analysis and strengthen the parser.
- *
- * The noFOLLOW flag is used as a means of restricting
- * the FOLLOW to a "local" FOLLOW. This variable is
- * orthogonal to the <tt>lock</tt> variable that prevents
- * infinite recursion. noFOLLOW does not care about what k is.
- */
- public Lookahead look(int k, RuleRefElement rr) {
- if ( DEBUG_ANALYZER ) System.out.println("lookRuleRef("+k+","+rr+")");
- RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
- if ( rs==null || !rs.defined ) {
- tool.error("no definition of rule "+rr.targetRule, grammar.getFilename(), rr.getLine());
- return new Lookahead();
- }
- RuleBlock rb = rs.getBlock();
- RuleEndElement end = rb.endNode;
- boolean saveEnd = end.noFOLLOW;
- end.noFOLLOW = true;
- // go off to the rule and get the lookahead (w/o FOLLOW)
- Lookahead p = look(k, rr.targetRule);
- if ( DEBUG_ANALYZER ) System.out.println("back from rule ref to "+rr.targetRule);
- // restore state of end block
- end.noFOLLOW = saveEnd;
-
- // check for infinite recursion. If a cycle is returned: trouble!
- if ( p.cycle!=null ) {
- tool.error("infinite recursion to rule "+p.cycle+" from rule "+
- rr.enclosingRuleName, grammar.getFilename(), rr.getLine());
- }
- // is the local FOLLOW required?
- if ( p.containsEpsilon() ) {
- if ( DEBUG_ANALYZER )
- System.out.println("rule ref to "+
- rr.targetRule+" has eps, depth: "+p.epsilonDepth);
-
- // remove epsilon
- p.resetEpsilon();
- // fset.clear(EPSILON_TYPE);
-
- // for each lookahead depth that saw epsilon
- int[] depths = p.epsilonDepth.toArray();
- p.epsilonDepth = null; // clear all epsilon stuff
- for (int i=0; i<depths.length; i++) {
- int rk = k - (k-depths[i]);
- Lookahead q = rr.next.look(rk); // see comments in Lookahead
- p.combineWith(q);
- }
- // note: any of these look() computations for local follow can
- // set EPSILON in the set again if the end of this rule is found.
- }
- return p;
- }
- public Lookahead look(int k, StringLiteralElement atom) {
- if ( DEBUG_ANALYZER ) System.out.println("lookStringLiteral("+k+","+atom+")");
- if ( lexicalAnalysis ) {
- // need more lookahead than string can provide?
- if ( k > atom.processedAtomText.length() ) {
- return atom.next.look(k - atom.processedAtomText.length());
- }
- else {
- // get char at lookahead depth k, from the processed literal text
- return Lookahead.of(atom.processedAtomText.charAt(k-1));
- }
- }
- else {
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return atom.next.look(k-1);
- }
- Lookahead l = Lookahead.of(atom.getType());
- if (atom.not) {
- // Invert the lookahead set against the token vocabulary
- int maxToken = grammar.tokenManager.maxTokenType();
- l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
- }
- return l;
- }
- }
- /**The lookahead of a (...)=> block is the lookahead of
- * what follows the block. By definition, the syntactic
- * predicate block defies static analysis (you want to try it
- * out at run-time). The LOOK of (a)=>A B is A for LL(1)
- * ### is this even called?
- */
- public Lookahead look(int k, SynPredBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("look=>("+k+","+blk+")");
- return blk.next.look(k);
- }
- public Lookahead look(int k, TokenRangeElement r) {
- if ( DEBUG_ANALYZER ) System.out.println("lookTokenRange("+k+","+r+")");
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return r.next.look(k-1);
- }
- BitSet p = BitSet.of(r.begin);
- for (int i=r.begin+1; i<=r.end; i++) {
- p.add(i);
- }
- return new Lookahead(p);
- }
- public Lookahead look(int k, TreeElement t) {
- if (DEBUG_ANALYZER)
- System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
- if (k > 1) {
- return t.next.look(k - 1);
- }
- Lookahead l = null;
- if (t.root instanceof WildcardElement) {
- l = t.root.look(1); // compute FIRST set minus previous rows
- } else {
- l = Lookahead.of(t.root.getType());
- if (t.root.not) {
- // Invert the lookahead set against the token vocabulary
- int maxToken = grammar.tokenManager.maxTokenType();
- l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
- }
- }
- return l;
- }
- public Lookahead look(int k, WildcardElement wc) {
- if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ")");
-
- // Skip until analysis hits k==1
- if ( k>1 ) {
- return wc.next.look(k-1);
- }
- BitSet b;
- if ( lexicalAnalysis ) {
- // Copy the character vocabulary
- b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
- }
- else {
- b = new BitSet(1);
- // Invert the lookahead set against the token vocabulary
- int maxToken = grammar.tokenManager.maxTokenType();
- b.notInPlace(Token.MIN_USER_TYPE, maxToken);
- if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ") after not: "+b);
- }
- // Remove prediction sets from competing alternatives
- // removeCompetingPredictionSets(b, wc);
- return new Lookahead(b);
- }
- /** The (...)* element is the combined lookahead of the alternatives and what can
- * follow the loop.
- */
- public Lookahead look(int k, ZeroOrMoreBlock blk) {
- if ( DEBUG_ANALYZER ) System.out.println("look*("+k+","+blk+")");
- Lookahead p = look(k, (AlternativeBlock)blk);
- Lookahead q = blk.next.look(k);
- p.combineWith(q);
- return p;
- }
- /**Compute the combined lookahead for all productions of a rule.
- * If the lookahead returns with epsilon, at least one epsilon
- * path exists (one that consumes no tokens). The noFOLLOW
- * flag being set for this endruleblk, indicates that the
- * a rule ref invoked this rule.
- *
- * Currently only look(RuleRef) calls this. There is no need
- * for the code generator to call this.
- */
- public Lookahead look(int k, String rule) {
- if ( DEBUG_ANALYZER ) System.out.println("lookRuleName("+k+","+rule+")");
- RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
- RuleBlock rb = rs.getBlock();
-
- if ( rb.lock[k] ) {
- if ( DEBUG_ANALYZER )
- System.out.println("infinite recursion to rule "+rb.getRuleName());
- return new Lookahead(rule);
- }
- // have we computed it before?
- if ( rb.cache[k]!=null ) {
- if ( DEBUG_ANALYZER ) {
- System.out.println("found depth "+k+" result in FIRST "+rule+" cache: "+
- rb.cache[k].toString(",", charFormatter, grammar));
- }
- return (Lookahead)rb.cache[k].clone();
- }
- rb.lock[k] = true;
- Lookahead p = look(k, (RuleBlock)rb);
- rb.lock[k] = false;
- // cache results
- rb.cache[k] = (Lookahead)p.clone();
- if ( DEBUG_ANALYZER ) {
- System.out.println("saving depth "+k+" result in FIRST "+rule+" cache: "+
- rb.cache[k].toString(",", charFormatter, grammar));
- }
- return p;
- }
- /** If the first k-1 sets are singleton sets, the appoximate
- * lookahead analysis is equivalent to full lookahead analysis.
- */
- public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
- // first k-1 sets degree 1?
- for (int i=1; i<=k-1; i++) {
- BitSet look = bset[i].fset;
- if ( look.degree()>1 ) {
- return false;
- }
- }
- return true;
- }
- /** Remove the prediction sets from preceding alternatives
- * and follow set, but *only* if this element is the first element
- * of the alternative. The class members currenBlock and
- * currentBlock.analysisAlt must be set correctly.
- * @param b The prediction bitset to be modified
- * @el The element of interest
- */
- private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
- // Only do this if the element is the first element of the alt,
- // because we are making an implicit assumption that k==1.
- GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
- // if element is #(. blah) then check to see if el is root
- if ( head instanceof TreeElement ) {
- if ( ((TreeElement) head).root != el ) {
- return;
- }
- }
- else if ( el != head ) {
- return;
- }
- for (int i = 0; i < currentBlock.analysisAlt; i++) {
- AlternativeElement e = currentBlock.getAlternativeAt(i).head;
- b.subtractInPlace(e.look(1).fset);
- }
- }
- /** Remove the prediction sets from preceding alternatives
- * The class members currenBlock must be set correctly.
- * Remove prediction sets from 1..k.
- * @param look The prediction lookahead to be modified
- * @el The element of interest
- * @k How deep into lookahead to modify
- */
- private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
- for (int d = 1; d <= k; d++) {
- for (int i = 0; i < currentBlock.analysisAlt; i++) {
- AlternativeElement e = currentBlock.getAlternativeAt(i).head;
- look[d].fset.subtractInPlace(e.look(d).fset);
- }
- }
- }
- /** reset the analyzer so it looks like a new one */
- private void reset() {
- grammar = null;
- DEBUG_ANALYZER = false;
- currentBlock = null;
- lexicalAnalysis = false;
- }
- /** Set the grammar for the analyzer */
- public void setGrammar(Grammar g) {
- if (grammar != null) {
- reset();
- }
- grammar = g;
- // Is this lexical?
- lexicalAnalysis = (grammar instanceof LexerGrammar);
- DEBUG_ANALYZER = grammar.analyzerDebug;
- }
- public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer)
- {
- if (
- blk instanceof ZeroOrMoreBlock ||
- blk instanceof OneOrMoreBlock ||
- blk instanceof SynPredBlock
- ) {
- return false;
- }
- // Cannot invert an empty subrule
- if (blk.alternatives.size() == 0) {
- return false;
- }
- // The block must only contain alternatives with a single element,
- // where each element is a char, token, char range, or token range.
- for (int i = 0; i < blk.alternatives.size(); i++) {
- Alternative alt = blk.getAlternativeAt(i);
- // Cannot have anything interesting in the alternative ...
- if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
- return false;
- }
- // ... and there must be one simple element
- AlternativeElement elt = alt.head;
- if (
- !(
- elt instanceof CharLiteralElement ||
- elt instanceof TokenRefElement ||
- elt instanceof CharRangeElement ||
- elt instanceof TokenRangeElement ||
- (elt instanceof StringLiteralElement && !forLexer)
- ) ||
- !(elt.next instanceof BlockEndElement) ||
- elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
- ) {
- return false;
- }
- }
- return true;
- }
- }