搜索引擎

开发平台：
C#

Analyzer.cs：源码内容
							/*
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
using System;
namespace Lucene.Net.Analysis
{
	
	/// <summary>An Analyzer builds TokenStreams, which analyze text.  It thus represents a
	/// policy for extracting index terms from text.
	/// <p>
	/// Typical implementations first build a Tokenizer, which breaks the stream of
	/// characters from the Reader into raw Tokens.  One or more TokenFilters may
	/// then be applied to the output of the Tokenizer.
	/// <p>
	/// WARNING: You must override one of the methods defined by this class in your
	/// subclass or the Analyzer will enter an infinite loop.
	/// </summary>
	public abstract class Analyzer
	{
		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
		/// Reader.  Default implementation forwards to tokenStream(Reader) for 
		/// compatibility with older version.  Override to allow Analyzer to choose 
		/// strategy based on document and/or field.  Must be able to handle null
		/// field name for backward compatibility. 
		/// </summary>
		public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			// implemented for backward compatibility
			return TokenStream(reader);
		}
		
		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
		/// Reader.  Provided for backward compatibility only.
		/// </summary>
		/// <deprecated> use tokenStream(String, Reader) instead.
		/// </deprecated>
		/// <seealso cref="TokenStream(String, Reader)">
		/// </seealso>
		public virtual TokenStream TokenStream(System.IO.TextReader reader)
		{
			return TokenStream(null, reader);
		}
		
		/// <summary> Invoked before indexing a Field instance if
		/// terms have already been added to that field.  This allows custom
		/// analyzers to place an automatic position increment gap between
		/// Field instances using the same field name.  The default value
		/// position increment gap is 0.  With a 0 position increment gap and
		/// the typical default token position increment of 1, all terms in a field,
		/// including across Field instances, are in successive positions, allowing
		/// exact PhraseQuery matches, for instance, across Field instance boundaries.
		/// 
		/// </summary>
		/// <param name="fieldName">Field name being indexed.
		/// </param>
		/// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
		/// </returns>
		public virtual int GetPositionIncrementGap(System.String fieldName)
		{
			return 0;
		}
	}
}