搜索引擎

开发平台：
C#

MultiSearcher.cs：源码内容
							/*
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
using System;
using Document = Lucene.Net.Documents.Document;
using Term = Lucene.Net.Index.Term;
namespace Lucene.Net.Search
{
	
	/// <summary>Implements search over a set of <code>Searchables</code>.
	/// 
	/// <p>Applications usually need only call the inherited {@link #Search(Query)}
	/// or {@link #Search(Query,Filter)} methods.
	/// </summary>
	public class MultiSearcher : Searcher
	{
		private class AnonymousClassHitCollector : HitCollector
		{
			public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
			{
				InitBlock(results, start, enclosingInstance);
			}
			private void  InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
			{
				this.results = results;
				this.start = start;
				this.enclosingInstance = enclosingInstance;
			}
			private Lucene.Net.Search.HitCollector results;
			private int start;
			private MultiSearcher enclosingInstance;
			public MultiSearcher Enclosing_Instance
			{
				get
				{
					return enclosingInstance;
				}
				
			}
			public override void  Collect(int doc, float score)
			{
				results.Collect(doc + start, score);
			}
		}
		/// <summary> Document Frequency cache acting as a Dummy-Searcher.
		/// This class is no full-fledged Searcher, but only supports
		/// the methods necessary to initialize Weights.
		/// </summary>
		private class CachedDfSource:Searcher
		{
			private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
			private int maxDoc; // document count
			
			public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
			{
				this.dfMap = dfMap;
				this.maxDoc = maxDoc;
			}
			
			public override int DocFreq(Term term)
			{
				int df;
				try
				{
					df = ((System.Int32) dfMap[term]);
				}
				catch (System.NullReferenceException)
				{
					throw new System.ArgumentException("df for term " + term.Text() + " not available");
				}
				return df;
			}
			
			public override int[] DocFreqs(Term[] terms)
			{
				int[] result = new int[terms.Length];
				for (int i = 0; i < terms.Length; i++)
				{
					result[i] = DocFreq(terms[i]);
				}
				return result;
			}
			
			public override int MaxDoc()
			{
				return maxDoc;
			}
			
			public override Query Rewrite(Query query)
			{
				// this is a bit of a hack. We know that a query which
				// creates a Weight based on this Dummy-Searcher is
				// always already rewritten (see preparedWeight()).
				// Therefore we just return the unmodified query here
				return query;
			}
			
			public override void  Close()
			{
				throw new System.NotSupportedException();
			}
			
			public override Document Doc(int i)
			{
				throw new System.NotSupportedException();
			}
			
			public override Explanation Explain(Weight weight, int doc)
			{
				throw new System.NotSupportedException();
			}
			
			public override void  Search(Weight weight, Filter filter, HitCollector results)
			{
				throw new System.NotSupportedException();
			}
			
			public override TopDocs Search(Weight weight, Filter filter, int n)
			{
				throw new System.NotSupportedException();
			}
			
			public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
			{
				throw new System.NotSupportedException();
			}
		}
		
		
		
		private Lucene.Net.Search.Searchable[] searchables;
		private int[] starts;
		private int maxDoc = 0;
		
		/// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
		public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
		{
			this.searchables = searchables;
			
			starts = new int[searchables.Length + 1]; // build starts array
			for (int i = 0; i < searchables.Length; i++)
			{
				starts[i] = maxDoc;
				maxDoc += searchables[i].MaxDoc(); // compute maxDocs
			}
			starts[searchables.Length] = maxDoc;
		}
		
		/// <summary>Return the array of {@link Searchable}s this searches. </summary>
		public virtual Lucene.Net.Search.Searchable[] GetSearchables()
		{
			return searchables;
		}
		
		protected internal virtual int[] GetStarts()
		{
			return starts;
		}
		
		// inherit javadoc
		public override void  Close()
		{
			for (int i = 0; i < searchables.Length; i++)
				searchables[i].Close();
		}
		
		public override int DocFreq(Term term)
		{
			int docFreq = 0;
			for (int i = 0; i < searchables.Length; i++)
				docFreq += searchables[i].DocFreq(term);
			return docFreq;
		}
		
		// inherit javadoc
		public override Document Doc(int n)
		{
			int i = SubSearcher(n); // find searcher index
			return searchables[i].Doc(n - starts[i]); // dispatch to searcher
		}
		
		/// <summary>Call {@link #subSearcher} instead.</summary>
		/// <deprecated>
		/// </deprecated>
		public virtual int SearcherIndex(int n)
		{
			return SubSearcher(n);
		}
		
		/// <summary>Returns index of the searcher for document <code>n</code> in the array
		/// used to construct this searcher. 
		/// </summary>
		public virtual int SubSearcher(int n)
		{
			// find searcher for doc n:
			// replace w/ call to Arrays.binarySearch in Java 1.2
			int lo = 0; // search starts array
			int hi = searchables.Length - 1; // for first element less
			// than n, return its index
			while (hi >= lo)
			{
				int mid = (lo + hi) >> 1;
				int midValue = starts[mid];
				if (n < midValue)
					hi = mid - 1;
				else if (n > midValue)
					lo = mid + 1;
				else
				{
					// found a match
					while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
					{
						mid++; // scan to last match
					}
					return mid;
				}
			}
			return hi;
		}
		
		/// <summary>Returns the document number of document <code>n</code> within its
		/// sub-index. 
		/// </summary>
		public virtual int SubDoc(int n)
		{
			return n - starts[SubSearcher(n)];
		}
		
		public override int MaxDoc()
		{
			return maxDoc;
		}
		
		public override TopDocs Search(Weight weight, Filter filter, int nDocs)
		{
			
			HitQueue hq = new HitQueue(nDocs);
			int totalHits = 0;
			
			for (int i = 0; i < searchables.Length; i++)
			{
				// search each searcher
				TopDocs docs = searchables[i].Search(weight, filter, nDocs);
				totalHits += docs.totalHits; // update totalHits
				ScoreDoc[] scoreDocs = docs.scoreDocs;
				for (int j = 0; j < scoreDocs.Length; j++)
				{
					// merge scoreDocs into hq
					ScoreDoc scoreDoc = scoreDocs[j];
					scoreDoc.doc += starts[i]; // convert doc
					if (!hq.Insert(scoreDoc))
						break; // no more scores > minScore
				}
			}
			
			ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
			for (int i = hq.Size() - 1; i >= 0; i--)
			    // put docs in array
				scoreDocs2[i] = (ScoreDoc) hq.Pop();
			
			float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
			
			return new TopDocs(totalHits, scoreDocs2, maxScore);
		}
		
		public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
		{
			FieldDocSortedHitQueue hq = null;
			int totalHits = 0;
			
			float maxScore = System.Single.NegativeInfinity;
			
			for (int i = 0; i < searchables.Length; i++)
			{
				// search each searcher
				TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
				
				if (hq == null)
					hq = new FieldDocSortedHitQueue(docs.fields, n);
				totalHits += docs.totalHits; // update totalHits
				maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
				ScoreDoc[] scoreDocs = docs.scoreDocs;
				for (int j = 0; j < scoreDocs.Length; j++)
				{
					// merge scoreDocs into hq
					ScoreDoc scoreDoc = scoreDocs[j];
					scoreDoc.doc += starts[i]; // convert doc
					if (!hq.Insert(scoreDoc))
						break; // no more scores > minScore
				}
			}
			
			ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
			for (int i = hq.Size() - 1; i >= 0; i--)
			    // put docs in array
				scoreDocs2[i] = (ScoreDoc) hq.Pop();
			
			return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
		}
		
		
		// inherit javadoc
		public override void  Search(Weight weight, Filter filter, HitCollector results)
		{
			for (int i = 0; i < searchables.Length; i++)
			{
				
				int start = starts[i];
				
				searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
			}
		}
		
		public override Query Rewrite(Query original)
		{
			Query[] queries = new Query[searchables.Length];
			for (int i = 0; i < searchables.Length; i++)
			{
				queries[i] = searchables[i].Rewrite(original);
			}
			return queries[0].Combine(queries);
		}
		
		public override Explanation Explain(Weight weight, int doc)
		{
			int i = SubSearcher(doc); // find searcher index
			return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
		}
		
		/// <summary> Create weight in multiple index scenario.
		/// 
		/// Distributed query processing is done in the following steps:
		/// 1. rewrite query
		/// 2. extract necessary terms
		/// 3. collect dfs for these terms from the Searchables
		/// 4. create query weight using aggregate dfs.
		/// 5. distribute that weight to Searchables
		/// 6. merge results
		/// 
		/// Steps 1-4 are done here, 5+6 in the search() methods
		/// 
		/// </summary>
		/// <returns> rewritten queries
		/// </returns>
		protected internal override Weight CreateWeight(Query original)
		{
			// step 1
			Query rewrittenQuery = Rewrite(original);
			
			// step 2
			System.Collections.Hashtable terms = new System.Collections.Hashtable();
			rewrittenQuery.ExtractTerms(terms);
			
			// step3
			Term[] allTermsArray = new Term[terms.Count];
            int index = 0;
            System.Collections.IEnumerator e = terms.GetEnumerator();
            while (e.MoveNext())
                allTermsArray[index++] = e.Current as Term;
			int[] aggregatedDfs = new int[terms.Count];
			for (int i = 0; i < searchables.Length; i++)
			{
				int[] dfs = searchables[i].DocFreqs(allTermsArray);
				for (int j = 0; j < aggregatedDfs.Length; j++)
				{
					aggregatedDfs[j] += dfs[j];
				}
			}
			
			System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
			for (int i = 0; i < allTermsArray.Length; i++)
			{
				dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
			}
			
			// step4
			int numDocs = MaxDoc();
			CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
			
			return rewrittenQuery.Weight(cacheSim);
		}
	}
}