CharScanner.cpp
上传用户:afrynkmhm
上传日期:2007-01-06
资源大小:1262k
文件大小:9k
- /**
- * <b>SOFTWARE RIGHTS</b>
- * <p>
- * ANTLR 2.6.0 MageLang Insitute, 1998
- * <p>
- * We reserve no legal rights to the ANTLR--it is fully in the
- * public domain. An individual or company may do whatever
- * they wish with source code distributed with ANTLR or the
- * code generated by ANTLR, including the incorporation of
- * ANTLR, or its output, into commerical software.
- * <p>
- * We encourage users to develop software with ANTLR. However,
- * we do ask that credit is given to us for developing
- * ANTLR. By "credit", we mean that if you use ANTLR or
- * incorporate any source code into one of your programs
- * (commercial product, research project, or otherwise) that
- * you acknowledge this fact somewhere in the documentation,
- * research report, etc... If you like ANTLR and have
- * developed a nice tool with the output, please mention that
- * you developed it using ANTLR. In addition, we ask that the
- * headers remain intact in our source code. As long as these
- * guidelines are kept, we expect to continue enhancing this
- * system and expect to make other tools available as they are
- * completed.
- * <p>
- * The ANTLR gang:
- * @version ANTLR 2.6.0 MageLang Insitute, 1998
- * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
- * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
- * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
- */
- #include "antlr/CharScanner.hpp"
- #include "antlr/CommonToken.hpp"
- #include "antlr/MismatchedCharException.hpp"
- #include "antlr/TokenStream.hpp"
- #include <map>
- #ifdef HAS_NOT_CCTYPE_H
- #include <ctype.h>
- #else
- #include <cctype>
- #endif
- #include <iostream>
- #ifdef HAS_NOT_CSTRING_H
- #include <string>
- #else
- #include <cstring>
- #endif
- ANTLR_BEGIN_NAMESPACE(antlr)
- CharScannerLiteralsLess::CharScannerLiteralsLess(const CharScanner* theScanner)
- : scanner(theScanner)
- {}
- bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
- {
- if (scanner->getCaseSensitiveLiterals()) {
- return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
- } else {
- #ifdef NO_STRCASECMP
- return (stricmp(x.c_str(),y.c_str())<0);
- #else
- return (strcasecmp(x.c_str(),y.c_str())<0);
- #endif
- }
- }
- CharScanner::CharScanner(InputBuffer& cb)
- : saveConsumedInput(true) //, caseSensitiveLiterals(true)
- , literals(CharScannerLiteralsLess(this))
- , inputState(new LexerInputState(cb))
- , commitToPath(false)
- {
- setTokenObjectFactory(&CommonToken::factory);
- }
- CharScanner::CharScanner(InputBuffer* cb)
- : saveConsumedInput(true) //, caseSensitiveLiterals(true)
- , literals(CharScannerLiteralsLess(this))
- , inputState(new LexerInputState(cb))
- , commitToPath(false)
- {
- setTokenObjectFactory(&CommonToken::factory);
- }
- CharScanner::CharScanner(const LexerSharedInputState& state)
- : saveConsumedInput(true) //, caseSensitiveLiterals(true)
- , literals(CharScannerLiteralsLess(this))
- , inputState(state)
- , commitToPath(false)
- {
- setTokenObjectFactory(&CommonToken::factory);
- }
- CharScanner::~CharScanner()
- {
- }
- void CharScanner::append(char c)
- {
- if (saveConsumedInput)
- text+=c;
- }
- void CharScanner::append(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- if (saveConsumedInput)
- text+=s;
- }
- void CharScanner::commit()
- {
- inputState->getInput().commit();
- }
- void CharScanner::consume()
- {
- if (inputState->guessing == 0) {
- if (caseSensitive) {
- append(LA(1));
- } else {
- // use input.LA(), not LA(), to get original case
- // CharScanner.LA() would toLower it.
- append(inputState->getInput().LA(1));
- }
- }
- inputState->getInput().consume();
- }
- /** Consume chars until one matches the given char */
- void CharScanner::consumeUntil(int c)
- {
- while (LA(1) != EOF_CHAR && LA(1) != c)
- {
- consume();
- }
- }
- /** Consume chars until one matches the given set */
- void CharScanner::consumeUntil(const BitSet& set)
- {
- while (LA(1) != EOF_CHAR && !set.member(LA(1))) {
- consume();
- }
- }
- bool CharScanner::getCaseSensitive() const
- { return caseSensitive; }
- //bool CharScanner::getCaseSensitiveLiterals() const
- //{ return caseSensitiveLiterals; }
- int CharScanner::getColumn() const
- { return inputState->column; }
- bool CharScanner::getCommitToPath() const
- { return commitToPath; }
- const ANTLR_USE_NAMESPACE(std)string& CharScanner::getFilename() const
- { return inputState->filename; }
- InputBuffer& CharScanner::getInputBuffer()
- { return inputState->getInput(); }
- LexerSharedInputState CharScanner::getInputState()
- { return inputState; }
- int CharScanner::getLine() const
- { return inputState->line; }
- // return a copy of the current text buffer
- const ANTLR_USE_NAMESPACE(std)string& CharScanner::getText() const
- { return text; }
- RefToken CharScanner::getTokenObject() const
- { return _returnToken; }
- int CharScanner::LA(int i)
- {
- if ( caseSensitive ) {
- return inputState->getInput().LA(i);
- } else {
- return toLower(inputState->getInput().LA(i));
- }
- }
- RefToken CharScanner::makeToken(int t)
- {
- RefToken tok=tokenFactory();
- tok->setType(t);
- tok->setLine(inputState->line);
- return tok;
- }
- int CharScanner::mark()
- {
- return inputState->getInput().mark();
- }
- void CharScanner::match(int c)
- {
- if ( LA(1) != c ) {
- throw MismatchedCharException(LA(1),c,false,this);
- }
- consume();
- }
- void CharScanner::match(const BitSet& b)
- {
- if (!b.member(LA(1))) {
- throw MismatchedCharException(LA(1),b,false,this);
- }
- consume();
- }
- void CharScanner::match(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- int len = s.length();
- for (int i=0; i<len; i++) {
- if ( LA(1) != s[i] ) {
- throw MismatchedCharException(LA(1),s[i],false,this);
- }
- consume();
- }
- }
- void CharScanner::matchNot(int c)
- {
- if ( LA(1) == c ) {
- throw MismatchedCharException(LA(1),c,true,this);
- }
- consume();
- }
- void CharScanner::matchRange(int c1, int c2)
- {
- if (LA(1)<c1 || LA(1)>c2) {
- throw MismatchedCharException(LA(1),c1,c2,false,this);
- }
- consume();
- }
- void CharScanner::newline()
- { ++inputState->line; }
- void CharScanner::panic()
- {
- ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic" << ANTLR_USE_NAMESPACE(std)endl;
- exit(1);
- }
- void CharScanner::panic(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
- exit(1);
- }
- /** Report exception errors caught in nextToken() */
- void CharScanner::reportError(const RecognitionException& ex)
- {
- ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl;
- }
- /** Parser error-reporting function can be overridden in subclass */
- void CharScanner::reportError(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- if (getFilename() == "")
- ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
- else
- ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
- }
- /** Parser warning-reporting function can be overridden in subclass */
- void CharScanner::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- if (getFilename() == "")
- ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
- else
- ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
- }
- void CharScanner::resetText()
- { text=""; }
- void CharScanner::rewind(int pos)
- {
- inputState->getInput().rewind(pos);
- }
- void CharScanner::setCaseSensitive(bool t)
- {
- caseSensitive = t;
- }
- void CharScanner::setCommitToPath(bool commit)
- {
- commitToPath = commit;
- }
- void CharScanner::setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
- { inputState->filename=f; }
- void CharScanner::setInputState(LexerSharedInputState state)
- { inputState = state; }
- void CharScanner::setLine(int l)
- { inputState->line=l; }
- void CharScanner::setText(const ANTLR_USE_NAMESPACE(std)string& s)
- { text=s; }
- void CharScanner::setTokenObjectFactory(factory_type factory)
- { tokenFactory=factory; }
- // Test the token text against the literals table
- // Override this method to perform a different literals test
- int CharScanner::testLiteralsTable(int ttype) const
- {
- ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
- if (i != literals.end())
- ttype = (*i).second;
- return ttype;
- }
- // Test the text passed in against the literals table
- // Override this method to perform a different literals test
- // This is used primarily when you want to test a portion of
- // a token.
- int CharScanner::testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text_, int ttype) const
- {
- ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text_);
- if (i != literals.end())
- ttype = (*i).second;
- return ttype;
- }
- // Override this method to get more specific case handling
- char CharScanner::toLower(char c) const
- {
- return tolower(c);
- }
- void CharScanner::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname)
- {
- ANTLR_USE_NAMESPACE(std)cout << "enter lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
- }
- void CharScanner::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname)
- {
- ANTLR_USE_NAMESPACE(std)cout << "exit lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
- }
- void CharScanner::uponEOF()
- {
- }
- #ifndef NO_STATIC_CONSTS
- const int CharScanner::NO_CHAR;
- const int CharScanner::EOF_CHAR;
- #endif
- ANTLR_END_NAMESPACE