StandardTokenizer.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:8k
- /*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
- using System;
- namespace Lucene.Net.Analysis.Standard
- {
-
- /// <summary>A grammar-based tokenizer constructed with JavaCC.
- ///
- /// <p> This should be a good tokenizer for most European-language documents:
- ///
- /// <ul>
- /// <li>Splits words at punctuation characters, removing punctuation. However, a
- /// dot that's not followed by whitespace is considered part of a token.
- /// <li>Splits words at hyphens, unless there's a number in the token, in which case
- /// the whole token is interpreted as a product number and is not split.
- /// <li>Recognizes email addresses and internet hostnames as one token.
- /// </ul>
- ///
- /// <p>Many applications have specific tokenizer needs. If this tokenizer does
- /// not suit your application, please consider copying this source code
- /// directory to your project and maintaining your own grammar-based tokenizer.
- /// </summary>
- public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
- {
-
- /// <summary>Constructs a tokenizer for this Reader. </summary>
- public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
- {
- this.input = reader;
- }
-
- /// <summary>Returns the next token in the stream, or null at EOS.
- /// <p>The returned token's type is set to an element of {@link
- /// StandardTokenizerConstants#tokenImage}.
- /// </summary>
- public override Lucene.Net.Analysis.Token Next()
- {
- Token token = null;
- switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
- {
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
- break;
-
- case 0:
- token = Jj_consume_token(0);
- break;
-
- default:
- jj_la1[0] = jj_gen;
- Jj_consume_token(- 1);
- throw new ParseException();
-
- }
- if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
- {
- {
- if (true)
- return null;
- }
- }
- else
- {
- {
- if (true)
- return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
- }
- }
- throw new System.ApplicationException("Missing return statement in function");
- }
- /// <summary>By default, closes the input Reader. </summary>
- public override void Close()
- {
- token_source.Close();
- base.Close();
- }
- public StandardTokenizerTokenManager token_source;
- public Token token, jj_nt;
- private int jj_ntk;
- private int jj_gen;
- private int[] jj_la1 = new int[1];
- private static int[] jj_la1_0_Renamed_Field;
- private static void jj_la1_0()
- {
- jj_la1_0_Renamed_Field = new int[]{0x10ff};
- }
-
- public StandardTokenizer(CharStream stream)
- {
- token_source = new StandardTokenizerTokenManager(stream);
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public virtual void ReInit(CharStream stream)
- {
- token_source.ReInit(stream);
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public StandardTokenizer(StandardTokenizerTokenManager tm)
- {
- token_source = tm;
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public virtual void ReInit(StandardTokenizerTokenManager tm)
- {
- token_source = tm;
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- private Token Jj_consume_token(int kind)
- {
- Token oldToken;
- if ((oldToken = token).next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk = - 1;
- if (token.kind == kind)
- {
- jj_gen++;
- return token;
- }
- token = oldToken;
- jj_kind = kind;
- throw GenerateParseException();
- }
-
- public Token GetNextToken()
- {
- if (token.next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk = - 1;
- jj_gen++;
- return token;
- }
-
- public Token GetToken(int index)
- {
- Token t = token;
- for (int i = 0; i < index; i++)
- {
- if (t.next != null)
- t = t.next;
- else
- t = t.next = token_source.GetNextToken();
- }
- return t;
- }
-
- private int Jj_ntk()
- {
- if ((jj_nt = token.next) == null)
- return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
- else
- return (jj_ntk = jj_nt.kind);
- }
-
- private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
- private int[] jj_expentry;
- private int jj_kind = - 1;
-
- public virtual ParseException GenerateParseException()
- {
- jj_expentries.Clear();
- bool[] la1tokens = new bool[16];
- for (int i = 0; i < 16; i++)
- {
- la1tokens[i] = false;
- }
- if (jj_kind >= 0)
- {
- la1tokens[jj_kind] = true;
- jj_kind = - 1;
- }
- for (int i = 0; i < 1; i++)
- {
- if (jj_la1[i] == jj_gen)
- {
- for (int j = 0; j < 32; j++)
- {
- if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
- {
- la1tokens[j] = true;
- }
- }
- }
- }
- for (int i = 0; i < 16; i++)
- {
- if (la1tokens[i])
- {
- jj_expentry = new int[1];
- jj_expentry[0] = i;
- jj_expentries.Add(jj_expentry);
- }
- }
- int[][] exptokseq = new int[jj_expentries.Count][];
- for (int i = 0; i < jj_expentries.Count; i++)
- {
- exptokseq[i] = (int[]) jj_expentries[i];
- }
- return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
- }
-
- public void Enable_tracing()
- {
- }
-
- public void Disable_tracing()
- {
- }
- static StandardTokenizer()
- {
- {
- jj_la1_0();
- }
- }
- }
- }