scanner.cpp
上传用户:kalinte
上传日期:2013-04-07
资源大小:127k
文件大小:12k
源码类别:

DNA

开发平台:

C++ Builder

  1. /****************************************************/
  2. /* File: scanner.cpp                                */
  3. /* The scanner implementation for the C- compiler   */
  4. /* Xiang Cui (sean)                                 */
  5. /* 230030782                                        */
  6. /****************************************************/
  7. #include "scanner.h"
  8. #include "map.h"
  9. map<string,TokenType> keywords;
  10. Myscanner::Myscanner(const char *FileName)
  11. {
  12.     keywords["else"]=ELSE;
  13.     keywords["if"]=IF;
  14.     keywords["int"]=INT;
  15.     keywords["return"]=RETURN;
  16.     keywords["void"]=VOID;
  17.     keywords["while"]=WHILE;
  18.     sf.open(FileName);
  19.     if (!sf)
  20.     {
  21.         cout<<"File "<<FileName<<" not found"<<endl;
  22.         exit(1);
  23.     }
  24.     save=true;
  25.     state=START;
  26.     tokens="";
  27.     currentToken=ID;
  28.     lineno=1;
  29. }
  30. TokenType Myscanner::scan() //sf is a ifstream object,C- sourcefile.
  31. {
  32.     tokens="";
  33.     state=START;
  34.     bool flag=true;
  35.     //ofstream listingfile("listing.mns");
  36.     //cout<<endl<<"C- COMPILATION: "<<sfn<<endl;
  37.     //cout<<"COMPILATION START..."<<endl;
  38.     while (state != DONE)
  39.     {
  40.         /* if (sf.eof())
  41.          {
  42.              currentToken=ENDFILE;
  43.              cout<<"COMPILATION OVER..."<<endl;
  44.              tokens="EOF";
  45.              state = DONE;
  46.              return currentToken;
  47.          }
  48.          else
  49.          {  */
  50.         sf.get(c);   //get a character from sourcefile
  51.         if (sf.eof())
  52.         {
  53.             currentToken=ENDFILE;
  54.             tokens="EOF";
  55.             state = DONE;
  56.             return currentToken;
  57.         }
  58.         if (c=='n') //if current character is 'n',# of Line+1
  59.         {lineno++;}
  60.         save = true;
  61.         switch (state)
  62.         {
  63.         case START:
  64.             if (isdigit(c))//if current char is a digit,change state to INNUM
  65.                 state = INNUM;
  66.             else if (isalpha(c))//if current char is a letter,change state to INID
  67.                 state = INID;
  68.             //if current char is '=',change state to INEQ,need further recognization to
  69.             //determine if it is a EQ token or a ASSIGN token
  70.             else if (c == '=')
  71.             {   // either ASSIGN or EQ
  72.                 tokens="=";
  73.                 sf.get(c);
  74.                 if ( c == '=' )
  75.                 {
  76.                     currentToken = EQ;
  77.                 }
  78.                 else
  79.                 {
  80.                     if(c=='n') lineno--;
  81.                     currentToken = ASSIGN;
  82.                     // backup in the input
  83.                     sf.unget();
  84.                     c='';
  85.                 }
  86.                 state = DONE;
  87.             }
  88.             //if current char is '<',change state to INLT,need further recognization to
  89.             //determine if it is a LT token or a LTEQ token
  90.             else if (c == '<')
  91.                 state = INLT;
  92.             //if current char is '>',change state to INGT,need further recognization to
  93.             //determine if it is a GT token or a GTEQ token
  94.             else if (c == '>')
  95.                 state = INGT;
  96.             //if current char is '!',change state to INNEQ,need further recognization to
  97.             //determine if it is a NEQ token or a ERROR token
  98.             else if (c == '!')
  99.                 state = INNEQ;
  100.             //if current char is '/',change state to INSLASH,need further recognization
  101.             //to determine if it will be a LCMNT token or a DIV token
  102.             else if (c == '/')
  103.                 state = INSLASH;
  104.             //consider the whitespace characters
  105.             else if ((c == ' ') || (c == 't') || (c == 'n')||(c==13))
  106.                 save = false;
  107.             else
  108.             {
  109.                 //if not a multicharaters token,it's must be a single character one
  110.                 state = DONE;
  111.                 //recognize various single character tokens
  112.                 switch (c)
  113.                 {
  114.                 case EOF:
  115.                     save = false;
  116.                     currentToken = ENDFILE;
  117.                     break;
  118.                 case '+':
  119.                     currentToken = PLUS;
  120.                     break;
  121.                 case '-':
  122.                     currentToken = MINUS;
  123.                     break;
  124.                 case '*':
  125.                     currentToken = TIMES;
  126.                     break;
  127.                 case '(':
  128.                     currentToken = LPAREN;
  129.                     break;
  130.                 case ')':
  131.                     currentToken = RPAREN;
  132.                     break;
  133.                 case ';':
  134.                     currentToken = SEMI;
  135.                     break;
  136.                 case ',':
  137.                     currentToken = COMMA;
  138.                     break;
  139.                 case '[':
  140.                     currentToken = LSQR;
  141.                     break;
  142.                 case ']':
  143.                     currentToken = RSQR;
  144.                     break;
  145.                 case '{':
  146.                     currentToken = LCRLY;
  147.                     break;
  148.                 case '}':
  149.                     currentToken = RCRLY;
  150.                     break;
  151.                 default:
  152.                     currentToken = ERROR;
  153.                     break;
  154.                 }
  155.             }
  156.             break;
  157.             //deal with double characters tokens
  158.         case INLCMNT:
  159.             save = false;
  160.             if (c == EOF)
  161.             {
  162.                 state = DONE;
  163.                 currentToken = ENDFILE;
  164.             }
  165.             //if current state is in INLCMNT and input character
  166.             //is '*',change state to INRCMNT,
  167.             else if (c == '*') {state = INRCMNT;/* if(flag) {cout<<"/* ";flag=false;}*/}
  168.             break;
  169.         case INRCMNT:
  170.             save = false;
  171.             if (c == EOF)
  172.             {
  173.                 state = DONE;
  174.                 currentToken = ENDFILE;
  175.             }
  176.             //if current state is in INRCMNT and input character
  177.             //is '/',change state to START(comments is end),
  178.             else if (c == '/') {save=false;state = DONE;currentToken = RCMNT;flag=false;}
  179.             else state=INLCMNT;
  180.             break;
  181.         case INEQ:
  182.             state = DONE;
  183.             //if current state is INEQ and input character
  184.             //is '=',currentToken =EQ
  185.             if (c == '=')
  186.                 currentToken =EQ;
  187.             else
  188.             {   if(c=='n') lineno--;
  189.                 sf.unget();
  190.                 c='';
  191.                 currentToken =ASSIGN;//otherwise it's a sort of assignment
  192.             }
  193.             break;
  194.         case INNEQ:
  195.             state = DONE;
  196.             //if current state is INNEQ and input character is '=',
  197.             //currentToken =NEQ
  198.             if (c == '=')
  199.                 currentToken =NEQ;
  200.             else
  201.             {   if(c=='n') lineno--;
  202.                 sf.unget();
  203.                 c='';
  204.                 currentToken =ERROR; //otherwise EORROR occured
  205.             }
  206.             break;
  207.             //if current state is INLT and input character
  208.             //is '=',currentToken =LTEQ
  209.         case INLT:
  210.             state = DONE;
  211.             if (c == '=')
  212.                 currentToken =LTEQ;
  213.             else
  214.             {   if(c=='n') lineno--;
  215.                 sf.unget();
  216.                 c='';
  217.                 currentToken =LT; //otherwise it's a lessthan token
  218.             }
  219.             break;
  220.             //if current state is INGT and input character is '=',currentToken =GTEQ
  221.         case INGT:
  222.             state = DONE;
  223.             if (c == '=')
  224.                 currentToken =GTEQ;
  225.             else
  226.             {   if(c=='n') lineno--;
  227.                 sf.unget();
  228.                 c='';
  229.                 currentToken = GT; //otherwise it's a ">=" token
  230.             }
  231.             break;
  232.             //if current state is INGT and input character
  233.             //is '*',currentToken ='/*' ,and change state to INLCMNT
  234.         case INSLASH:
  235.             //!!!!!state = DONE;
  236.             if (c == '*')
  237.             {
  238.                 currentToken =LCMNT;
  239.                 state= INLCMNT;
  240.                 save=false;
  241.             }
  242.             else
  243.             {   if(c=='n') lineno--;
  244.                 sf.unget();
  245.                 c='';
  246.                 currentToken=DIV;//otherwise it's a "/" token
  247.                 state=DONE;
  248.             }
  249.             break;
  250.             //if current state is INNUM and input character isn't
  251.             //a digit,currentToken is a number ,change state to
  252.             //DONE,back up current char as well
  253.         case INNUM:
  254.             if(isalpha(c))
  255.             {
  256.                 currentToken = ERROR;
  257.                 state = ERR;
  258.             }
  259.             if (!isalpha(c)&&!isdigit(c))
  260.             {   if(c=='n') lineno--;
  261.                 sf.unget();
  262.                 c='';
  263.                 save = false;
  264.                 state = DONE;
  265.                 currentToken = NUM;
  266.             }
  267.             break;
  268.             //if current state is INID and input character
  269.             //isn't a letter,currentToken is a ID ,change
  270.             //state to DONE,back up current char as well
  271.         case INID:
  272.             if(isdigit(c))
  273.             {
  274.                 currentToken = ERROR;
  275.                 state = ERR;
  276.             }
  277.             if (!isalpha(c)&&!isdigit(c))
  278.             {   if(c=='n') lineno--;
  279.                 sf.unget();
  280.                 c='';
  281.                 save = false;
  282.                 state = DONE;
  283.                 currentToken = ID;
  284.             }
  285.             break;
  286.         case ERR:
  287.             if (!isalpha(c)&&!isdigit(c))
  288.             {
  289.                 sf.unget();
  290.                 c='';
  291.                 state = DONE;
  292.             }
  293.             break;
  294.         case DONE:
  295.         default:
  296.             state = DONE;
  297.             currentToken = ERROR;
  298.             break;
  299.         }
  300.         if (save)
  301.             tokens =tokens+c;
  302.         if ((state==DONE))
  303.         {
  304.             if (currentToken == ID)
  305.                 currentToken = reservedLookup(tokens);
  306.             flag=false;
  307.             cout<<"Line:"<<lineno<<" ";
  308.             //printToken(currentToken,tokens);
  309.             //state=START;
  310.         if(currentToken== RCMNT){scan();}
  311.             flag=true;
  312.         }
  313.     }
  314.     //}
  315.     return currentToken;
  316. }
  317. void Myscanner::printToken(TokenType token, string tokenString)
  318. {
  319.     switch (token)
  320.     {
  321.     case ELSE:
  322.     case INT:
  323.     case IF:
  324.     case RETURN:
  325.     case VOID:
  326.     case WHILE:
  327.         cout<<"reserved word: "<<tokenString<<endl;break;
  328.     case ASSIGN: cout<<"="<<endl; break;
  329.     case LT: cout<<"<"<<endl; break;
  330.     case EQ: cout<<"=="<<endl; break;
  331.     case LPAREN: cout<<"("<<endl; break;
  332.     case RPAREN: cout<<")"<<endl; break;
  333.     case SEMI: cout<<";"<<endl; break;
  334.     case PLUS: cout<<"+"<<endl; break;
  335.     case MINUS:cout<<"-"<<endl; break;
  336.     case TIMES: cout<<"*"<<endl; break;
  337.     case GT: cout<<">"<<endl; break;
  338.     case DIV: cout<<"/"<<endl; break;
  339.     case LTEQ: cout<<"<="<<endl; break;
  340.     case GTEQ: cout<<">="<<endl; break;
  341.     case NEQ: cout<<"!="<<endl; break;
  342.     case LSQR: cout<<"["<<endl; break;
  343.     case RSQR: cout<<"]"<<endl; break;
  344.     case LCRLY: cout<<"{"<<endl; break;
  345.     case RCRLY: cout<<"}"<<endl; break;
  346.     case LCMNT: cout<<"/*"<<endl; break;
  347.     case RCMNT: cout<<"*/"<<endl; break;
  348.     case COMMA: cout<<","<<endl; break;
  349.     case ENDFILE: cout<<"EOF"<<endl; break;
  350.     case NUM:
  351.         cout<<"NUM, val="<<tokenString<<endl;
  352.         break;
  353.     case ID:
  354.         cout<<"ID, name="<<tokenString<<endl;
  355.         break;
  356.     case ERROR:
  357.         cout<<"ERROR: "<<tokenString<<endl;
  358.         break;
  359.     default: /* should never happen */
  360.         cout<<"ERROR: should never happen"<<endl;
  361.     }
  362. }
  363. TokenType Myscanner::reservedLookup (string str)
  364. {
  365.     if (keywords.find(str)!=keywords.end())
  366.     {
  367.         return keywords[str];
  368.     }
  369.     return ID;
  370. }