MultiSearcher.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:11k
- /*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using Document = Lucene.Net.Documents.Document;
- using Term = Lucene.Net.Index.Term;
- namespace Lucene.Net.Search
- {
-
- /// <summary>Implements search over a set of <code>Searchables</code>.
- ///
- /// <p>Applications usually need only call the inherited {@link #Search(Query)}
- /// or {@link #Search(Query,Filter)} methods.
- /// </summary>
- public class MultiSearcher : Searcher
- {
- private class AnonymousClassHitCollector : HitCollector
- {
- public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
- {
- InitBlock(results, start, enclosingInstance);
- }
- private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
- {
- this.results = results;
- this.start = start;
- this.enclosingInstance = enclosingInstance;
- }
- private Lucene.Net.Search.HitCollector results;
- private int start;
- private MultiSearcher enclosingInstance;
- public MultiSearcher Enclosing_Instance
- {
- get
- {
- return enclosingInstance;
- }
-
- }
- public override void Collect(int doc, float score)
- {
- results.Collect(doc + start, score);
- }
- }
- /// <summary> Document Frequency cache acting as a Dummy-Searcher.
- /// This class is no full-fledged Searcher, but only supports
- /// the methods necessary to initialize Weights.
- /// </summary>
- private class CachedDfSource:Searcher
- {
- private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
- private int maxDoc; // document count
-
- public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
- {
- this.dfMap = dfMap;
- this.maxDoc = maxDoc;
- }
-
- public override int DocFreq(Term term)
- {
- int df;
- try
- {
- df = ((System.Int32) dfMap[term]);
- }
- catch (System.NullReferenceException)
- {
- throw new System.ArgumentException("df for term " + term.Text() + " not available");
- }
- return df;
- }
-
- public override int[] DocFreqs(Term[] terms)
- {
- int[] result = new int[terms.Length];
- for (int i = 0; i < terms.Length; i++)
- {
- result[i] = DocFreq(terms[i]);
- }
- return result;
- }
-
- public override int MaxDoc()
- {
- return maxDoc;
- }
-
- public override Query Rewrite(Query query)
- {
- // this is a bit of a hack. We know that a query which
- // creates a Weight based on this Dummy-Searcher is
- // always already rewritten (see preparedWeight()).
- // Therefore we just return the unmodified query here
- return query;
- }
-
- public override void Close()
- {
- throw new System.NotSupportedException();
- }
-
- public override Document Doc(int i)
- {
- throw new System.NotSupportedException();
- }
-
- public override Explanation Explain(Weight weight, int doc)
- {
- throw new System.NotSupportedException();
- }
-
- public override void Search(Weight weight, Filter filter, HitCollector results)
- {
- throw new System.NotSupportedException();
- }
-
- public override TopDocs Search(Weight weight, Filter filter, int n)
- {
- throw new System.NotSupportedException();
- }
-
- public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
- {
- throw new System.NotSupportedException();
- }
- }
-
-
-
- private Lucene.Net.Search.Searchable[] searchables;
- private int[] starts;
- private int maxDoc = 0;
-
- /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
- public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
- {
- this.searchables = searchables;
-
- starts = new int[searchables.Length + 1]; // build starts array
- for (int i = 0; i < searchables.Length; i++)
- {
- starts[i] = maxDoc;
- maxDoc += searchables[i].MaxDoc(); // compute maxDocs
- }
- starts[searchables.Length] = maxDoc;
- }
-
- /// <summary>Return the array of {@link Searchable}s this searches. </summary>
- public virtual Lucene.Net.Search.Searchable[] GetSearchables()
- {
- return searchables;
- }
-
- protected internal virtual int[] GetStarts()
- {
- return starts;
- }
-
- // inherit javadoc
- public override void Close()
- {
- for (int i = 0; i < searchables.Length; i++)
- searchables[i].Close();
- }
-
- public override int DocFreq(Term term)
- {
- int docFreq = 0;
- for (int i = 0; i < searchables.Length; i++)
- docFreq += searchables[i].DocFreq(term);
- return docFreq;
- }
-
- // inherit javadoc
- public override Document Doc(int n)
- {
- int i = SubSearcher(n); // find searcher index
- return searchables[i].Doc(n - starts[i]); // dispatch to searcher
- }
-
- /// <summary>Call {@link #subSearcher} instead.</summary>
- /// <deprecated>
- /// </deprecated>
- public virtual int SearcherIndex(int n)
- {
- return SubSearcher(n);
- }
-
- /// <summary>Returns index of the searcher for document <code>n</code> in the array
- /// used to construct this searcher.
- /// </summary>
- public virtual int SubSearcher(int n)
- {
- // find searcher for doc n:
- // replace w/ call to Arrays.binarySearch in Java 1.2
- int lo = 0; // search starts array
- int hi = searchables.Length - 1; // for first element less
- // than n, return its index
- while (hi >= lo)
- {
- int mid = (lo + hi) >> 1;
- int midValue = starts[mid];
- if (n < midValue)
- hi = mid - 1;
- else if (n > midValue)
- lo = mid + 1;
- else
- {
- // found a match
- while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
- {
- mid++; // scan to last match
- }
- return mid;
- }
- }
- return hi;
- }
-
- /// <summary>Returns the document number of document <code>n</code> within its
- /// sub-index.
- /// </summary>
- public virtual int SubDoc(int n)
- {
- return n - starts[SubSearcher(n)];
- }
-
- public override int MaxDoc()
- {
- return maxDoc;
- }
-
- public override TopDocs Search(Weight weight, Filter filter, int nDocs)
- {
-
- HitQueue hq = new HitQueue(nDocs);
- int totalHits = 0;
-
- for (int i = 0; i < searchables.Length; i++)
- {
- // search each searcher
- TopDocs docs = searchables[i].Search(weight, filter, nDocs);
- totalHits += docs.totalHits; // update totalHits
- ScoreDoc[] scoreDocs = docs.scoreDocs;
- for (int j = 0; j < scoreDocs.Length; j++)
- {
- // merge scoreDocs into hq
- ScoreDoc scoreDoc = scoreDocs[j];
- scoreDoc.doc += starts[i]; // convert doc
- if (!hq.Insert(scoreDoc))
- break; // no more scores > minScore
- }
- }
-
- ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
- for (int i = hq.Size() - 1; i >= 0; i--)
- // put docs in array
- scoreDocs2[i] = (ScoreDoc) hq.Pop();
-
- float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
-
- return new TopDocs(totalHits, scoreDocs2, maxScore);
- }
-
- public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
- {
- FieldDocSortedHitQueue hq = null;
- int totalHits = 0;
-
- float maxScore = System.Single.NegativeInfinity;
-
- for (int i = 0; i < searchables.Length; i++)
- {
- // search each searcher
- TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
-
- if (hq == null)
- hq = new FieldDocSortedHitQueue(docs.fields, n);
- totalHits += docs.totalHits; // update totalHits
- maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
- ScoreDoc[] scoreDocs = docs.scoreDocs;
- for (int j = 0; j < scoreDocs.Length; j++)
- {
- // merge scoreDocs into hq
- ScoreDoc scoreDoc = scoreDocs[j];
- scoreDoc.doc += starts[i]; // convert doc
- if (!hq.Insert(scoreDoc))
- break; // no more scores > minScore
- }
- }
-
- ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
- for (int i = hq.Size() - 1; i >= 0; i--)
- // put docs in array
- scoreDocs2[i] = (ScoreDoc) hq.Pop();
-
- return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
- }
-
-
- // inherit javadoc
- public override void Search(Weight weight, Filter filter, HitCollector results)
- {
- for (int i = 0; i < searchables.Length; i++)
- {
-
- int start = starts[i];
-
- searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
- }
- }
-
- public override Query Rewrite(Query original)
- {
- Query[] queries = new Query[searchables.Length];
- for (int i = 0; i < searchables.Length; i++)
- {
- queries[i] = searchables[i].Rewrite(original);
- }
- return queries[0].Combine(queries);
- }
-
- public override Explanation Explain(Weight weight, int doc)
- {
- int i = SubSearcher(doc); // find searcher index
- return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
- }
-
- /// <summary> Create weight in multiple index scenario.
- ///
- /// Distributed query processing is done in the following steps:
- /// 1. rewrite query
- /// 2. extract necessary terms
- /// 3. collect dfs for these terms from the Searchables
- /// 4. create query weight using aggregate dfs.
- /// 5. distribute that weight to Searchables
- /// 6. merge results
- ///
- /// Steps 1-4 are done here, 5+6 in the search() methods
- ///
- /// </summary>
- /// <returns> rewritten queries
- /// </returns>
- protected internal override Weight CreateWeight(Query original)
- {
- // step 1
- Query rewrittenQuery = Rewrite(original);
-
- // step 2
- System.Collections.Hashtable terms = new System.Collections.Hashtable();
- rewrittenQuery.ExtractTerms(terms);
-
- // step3
- Term[] allTermsArray = new Term[terms.Count];
- int index = 0;
- System.Collections.IEnumerator e = terms.GetEnumerator();
- while (e.MoveNext())
- allTermsArray[index++] = e.Current as Term;
- int[] aggregatedDfs = new int[terms.Count];
- for (int i = 0; i < searchables.Length; i++)
- {
- int[] dfs = searchables[i].DocFreqs(allTermsArray);
- for (int j = 0; j < aggregatedDfs.Length; j++)
- {
- aggregatedDfs[j] += dfs[j];
- }
- }
-
- System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
- for (int i = 0; i < allTermsArray.Length; i++)
- {
- dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
- }
-
- // step4
- int numDocs = MaxDoc();
- CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
-
- return rewrittenQuery.Weight(cacheSim);
- }
- }
- }