scanner.cpp
上传用户:kalinte
上传日期:2013-04-07
资源大小:127k
文件大小:12k
- /****************************************************/
- /* File: scanner.cpp */
- /* The scanner implementation for the C- compiler */
- /* Xiang Cui (sean) */
- /* 230030782 */
- /****************************************************/
- #include "scanner.h"
- #include "map.h"
- map<string,TokenType> keywords;
- Myscanner::Myscanner(const char *FileName)
- {
- keywords["else"]=ELSE;
- keywords["if"]=IF;
- keywords["int"]=INT;
- keywords["return"]=RETURN;
- keywords["void"]=VOID;
- keywords["while"]=WHILE;
- sf.open(FileName);
- if (!sf)
- {
- cout<<"File "<<FileName<<" not found"<<endl;
- exit(1);
- }
- save=true;
- state=START;
- tokens="";
- currentToken=ID;
- lineno=1;
- }
- TokenType Myscanner::scan() //sf is a ifstream object,C- sourcefile.
- {
- tokens="";
- state=START;
- bool flag=true;
- //ofstream listingfile("listing.mns");
- //cout<<endl<<"C- COMPILATION: "<<sfn<<endl;
- //cout<<"COMPILATION START..."<<endl;
- while (state != DONE)
- {
- /* if (sf.eof())
- {
- currentToken=ENDFILE;
- cout<<"COMPILATION OVER..."<<endl;
- tokens="EOF";
- state = DONE;
- return currentToken;
- }
- else
- { */
- sf.get(c); //get a character from sourcefile
- if (sf.eof())
- {
- currentToken=ENDFILE;
- tokens="EOF";
- state = DONE;
- return currentToken;
- }
- if (c=='n') //if current character is 'n',# of Line+1
- {lineno++;}
- save = true;
- switch (state)
- {
- case START:
- if (isdigit(c))//if current char is a digit,change state to INNUM
- state = INNUM;
- else if (isalpha(c))//if current char is a letter,change state to INID
- state = INID;
- //if current char is '=',change state to INEQ,need further recognization to
- //determine if it is a EQ token or a ASSIGN token
- else if (c == '=')
- { // either ASSIGN or EQ
- tokens="=";
- sf.get(c);
- if ( c == '=' )
- {
- currentToken = EQ;
- }
- else
- {
- if(c=='n') lineno--;
- currentToken = ASSIGN;
- // backup in the input
- sf.unget();
- c=' ';
- }
- state = DONE;
- }
- //if current char is '<',change state to INLT,need further recognization to
- //determine if it is a LT token or a LTEQ token
- else if (c == '<')
- state = INLT;
- //if current char is '>',change state to INGT,need further recognization to
- //determine if it is a GT token or a GTEQ token
- else if (c == '>')
- state = INGT;
- //if current char is '!',change state to INNEQ,need further recognization to
- //determine if it is a NEQ token or a ERROR token
- else if (c == '!')
- state = INNEQ;
- //if current char is '/',change state to INSLASH,need further recognization
- //to determine if it will be a LCMNT token or a DIV token
- else if (c == '/')
- state = INSLASH;
- //consider the whitespace characters
- else if ((c == ' ') || (c == 't') || (c == 'n')||(c==13))
- save = false;
- else
- {
- //if not a multicharaters token,it's must be a single character one
- state = DONE;
- //recognize various single character tokens
- switch (c)
- {
- case EOF:
- save = false;
- currentToken = ENDFILE;
- break;
- case '+':
- currentToken = PLUS;
- break;
- case '-':
- currentToken = MINUS;
- break;
- case '*':
- currentToken = TIMES;
- break;
- case '(':
- currentToken = LPAREN;
- break;
- case ')':
- currentToken = RPAREN;
- break;
- case ';':
- currentToken = SEMI;
- break;
- case ',':
- currentToken = COMMA;
- break;
- case '[':
- currentToken = LSQR;
- break;
- case ']':
- currentToken = RSQR;
- break;
- case '{':
- currentToken = LCRLY;
- break;
- case '}':
- currentToken = RCRLY;
- break;
- default:
- currentToken = ERROR;
- break;
- }
- }
- break;
- //deal with double characters tokens
- case INLCMNT:
- save = false;
- if (c == EOF)
- {
- state = DONE;
- currentToken = ENDFILE;
- }
- //if current state is in INLCMNT and input character
- //is '*',change state to INRCMNT,
- else if (c == '*') {state = INRCMNT;/* if(flag) {cout<<"/* ";flag=false;}*/}
- break;
- case INRCMNT:
- save = false;
- if (c == EOF)
- {
- state = DONE;
- currentToken = ENDFILE;
- }
- //if current state is in INRCMNT and input character
- //is '/',change state to START(comments is end),
- else if (c == '/') {save=false;state = DONE;currentToken = RCMNT;flag=false;}
- else state=INLCMNT;
- break;
- case INEQ:
- state = DONE;
- //if current state is INEQ and input character
- //is '=',currentToken =EQ
- if (c == '=')
- currentToken =EQ;
- else
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- currentToken =ASSIGN;//otherwise it's a sort of assignment
- }
- break;
- case INNEQ:
- state = DONE;
- //if current state is INNEQ and input character is '=',
- //currentToken =NEQ
- if (c == '=')
- currentToken =NEQ;
- else
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- currentToken =ERROR; //otherwise EORROR occured
- }
- break;
- //if current state is INLT and input character
- //is '=',currentToken =LTEQ
- case INLT:
- state = DONE;
- if (c == '=')
- currentToken =LTEQ;
- else
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- currentToken =LT; //otherwise it's a lessthan token
- }
- break;
- //if current state is INGT and input character is '=',currentToken =GTEQ
- case INGT:
- state = DONE;
- if (c == '=')
- currentToken =GTEQ;
- else
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- currentToken = GT; //otherwise it's a ">=" token
- }
- break;
- //if current state is INGT and input character
- //is '*',currentToken ='/*' ,and change state to INLCMNT
- case INSLASH:
- //!!!!!state = DONE;
- if (c == '*')
- {
- currentToken =LCMNT;
- state= INLCMNT;
- save=false;
- }
- else
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- currentToken=DIV;//otherwise it's a "/" token
- state=DONE;
- }
- break;
- //if current state is INNUM and input character isn't
- //a digit,currentToken is a number ,change state to
- //DONE,back up current char as well
- case INNUM:
- if(isalpha(c))
- {
- currentToken = ERROR;
- state = ERR;
- }
- if (!isalpha(c)&&!isdigit(c))
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- save = false;
- state = DONE;
- currentToken = NUM;
- }
- break;
- //if current state is INID and input character
- //isn't a letter,currentToken is a ID ,change
- //state to DONE,back up current char as well
- case INID:
- if(isdigit(c))
- {
- currentToken = ERROR;
- state = ERR;
- }
- if (!isalpha(c)&&!isdigit(c))
- { if(c=='n') lineno--;
- sf.unget();
- c=' ';
- save = false;
- state = DONE;
- currentToken = ID;
- }
- break;
- case ERR:
- if (!isalpha(c)&&!isdigit(c))
- {
- sf.unget();
- c=' ';
- state = DONE;
- }
- break;
- case DONE:
- default:
- state = DONE;
- currentToken = ERROR;
- break;
- }
- if (save)
- tokens =tokens+c;
- if ((state==DONE))
- {
- if (currentToken == ID)
- currentToken = reservedLookup(tokens);
- flag=false;
- cout<<"Line:"<<lineno<<" ";
- //printToken(currentToken,tokens);
- //state=START;
- if(currentToken== RCMNT){scan();}
- flag=true;
- }
- }
- //}
- return currentToken;
- }
- void Myscanner::printToken(TokenType token, string tokenString)
- {
- switch (token)
- {
- case ELSE:
- case INT:
- case IF:
- case RETURN:
- case VOID:
- case WHILE:
- cout<<"reserved word: "<<tokenString<<endl;break;
- case ASSIGN: cout<<"="<<endl; break;
- case LT: cout<<"<"<<endl; break;
- case EQ: cout<<"=="<<endl; break;
- case LPAREN: cout<<"("<<endl; break;
- case RPAREN: cout<<")"<<endl; break;
- case SEMI: cout<<";"<<endl; break;
- case PLUS: cout<<"+"<<endl; break;
- case MINUS:cout<<"-"<<endl; break;
- case TIMES: cout<<"*"<<endl; break;
- case GT: cout<<">"<<endl; break;
- case DIV: cout<<"/"<<endl; break;
- case LTEQ: cout<<"<="<<endl; break;
- case GTEQ: cout<<">="<<endl; break;
- case NEQ: cout<<"!="<<endl; break;
- case LSQR: cout<<"["<<endl; break;
- case RSQR: cout<<"]"<<endl; break;
- case LCRLY: cout<<"{"<<endl; break;
- case RCRLY: cout<<"}"<<endl; break;
- case LCMNT: cout<<"/*"<<endl; break;
- case RCMNT: cout<<"*/"<<endl; break;
- case COMMA: cout<<","<<endl; break;
- case ENDFILE: cout<<"EOF"<<endl; break;
- case NUM:
- cout<<"NUM, val="<<tokenString<<endl;
- break;
- case ID:
- cout<<"ID, name="<<tokenString<<endl;
- break;
- case ERROR:
- cout<<"ERROR: "<<tokenString<<endl;
- break;
- default: /* should never happen */
- cout<<"ERROR: should never happen"<<endl;
- }
- }
- TokenType Myscanner::reservedLookup (string str)
- {
- if (keywords.find(str)!=keywords.end())
- {
- return keywords[str];
- }
- return ID;
- }