Analyzer.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:3k
- /*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- namespace Lucene.Net.Analysis
- {
-
- /// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
- /// policy for extracting index terms from text.
- /// <p>
- /// Typical implementations first build a Tokenizer, which breaks the stream of
- /// characters from the Reader into raw Tokens. One or more TokenFilters may
- /// then be applied to the output of the Tokenizer.
- /// <p>
- /// WARNING: You must override one of the methods defined by this class in your
- /// subclass or the Analyzer will enter an infinite loop.
- /// </summary>
- public abstract class Analyzer
- {
- /// <summary>Creates a TokenStream which tokenizes all the text in the provided
- /// Reader. Default implementation forwards to tokenStream(Reader) for
- /// compatibility with older version. Override to allow Analyzer to choose
- /// strategy based on document and/or field. Must be able to handle null
- /// field name for backward compatibility.
- /// </summary>
- public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- // implemented for backward compatibility
- return TokenStream(reader);
- }
-
- /// <summary>Creates a TokenStream which tokenizes all the text in the provided
- /// Reader. Provided for backward compatibility only.
- /// </summary>
- /// <deprecated> use tokenStream(String, Reader) instead.
- /// </deprecated>
- /// <seealso cref="TokenStream(String, Reader)">
- /// </seealso>
- public virtual TokenStream TokenStream(System.IO.TextReader reader)
- {
- return TokenStream(null, reader);
- }
-
- /// <summary> Invoked before indexing a Field instance if
- /// terms have already been added to that field. This allows custom
- /// analyzers to place an automatic position increment gap between
- /// Field instances using the same field name. The default value
- /// position increment gap is 0. With a 0 position increment gap and
- /// the typical default token position increment of 1, all terms in a field,
- /// including across Field instances, are in successive positions, allowing
- /// exact PhraseQuery matches, for instance, across Field instance boundaries.
- ///
- /// </summary>
- /// <param name="fieldName">Field name being indexed.
- /// </param>
- /// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
- /// </returns>
- public virtual int GetPositionIncrementGap(System.String fieldName)
- {
- return 0;
- }
- }
- }