PhraseQuery.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:11k
- /*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using IndexReader = Lucene.Net.Index.IndexReader;
- using Term = Lucene.Net.Index.Term;
- using TermPositions = Lucene.Net.Index.TermPositions;
- using ToStringUtils = Lucene.Net.Util.ToStringUtils;
- namespace Lucene.Net.Search
- {
-
- /// <summary>A Query that matches documents containing a particular sequence of terms.
- /// A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
- ///
- /// <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
- /// </summary>
- [Serializable]
- public class PhraseQuery : Query
- {
- private System.String field;
- private System.Collections.ArrayList terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
- private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
- private int slop = 0;
-
- /// <summary>Constructs an empty phrase query. </summary>
- public PhraseQuery()
- {
- }
-
- /// <summary>Sets the number of other words permitted between words in query phrase.
- /// If zero, then this is an exact phrase search. For larger values this works
- /// like a <code>WITHIN</code> or <code>NEAR</code> operator.
- /// <p>The slop is in fact an edit-distance, where the units correspond to
- /// moves of terms in the query phrase out of position. For example, to switch
- /// the order of two words requires two moves (the first move places the words
- /// atop one another), so to permit re-orderings of phrases, the slop must be
- /// at least two.
- /// <p>More exact matches are scored higher than sloppier matches, thus search
- /// results are sorted by exactness.
- /// <p>The slop is zero by default, requiring exact matches.
- /// </summary>
- public virtual void SetSlop(int s)
- {
- slop = s;
- }
- /// <summary>Returns the slop. See setSlop(). </summary>
- public virtual int GetSlop()
- {
- return slop;
- }
-
- /// <summary> Adds a term to the end of the query phrase.
- /// The relative position of the term is the one immediately after the last term added.
- /// </summary>
- public virtual void Add(Term term)
- {
- int position = 0;
- if (positions.Count > 0)
- position = ((System.Int32) positions[positions.Count - 1]) + 1;
-
- Add(term, position);
- }
-
- /// <summary> Adds a term to the end of the query phrase.
- /// The relative position of the term within the phrase is specified explicitly.
- /// This allows e.g. phrases with more than one term at the same position
- /// or phrases with gaps (e.g. in connection with stopwords).
- ///
- /// </summary>
- /// <param name="term">
- /// </param>
- /// <param name="position">
- /// </param>
- public virtual void Add(Term term, int position)
- {
- if (terms.Count == 0)
- field = term.Field();
- else if (term.Field() != field)
- {
- throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
- }
-
- terms.Add(term);
- positions.Add((System.Int32) position);
- }
-
- /// <summary>Returns the set of terms in this phrase. </summary>
- public virtual Term[] GetTerms()
- {
- return (Term[]) terms.ToArray(typeof(Term));
- }
-
- /// <summary> Returns the relative positions of terms in this phrase.</summary>
- public virtual int[] GetPositions()
- {
- int[] result = new int[positions.Count];
- for (int i = 0; i < positions.Count; i++)
- result[i] = ((System.Int32) positions[i]);
- return result;
- }
-
- [Serializable]
- private class PhraseWeight : Weight
- {
- private void InitBlock(PhraseQuery enclosingInstance)
- {
- this.enclosingInstance = enclosingInstance;
- }
- private PhraseQuery enclosingInstance;
- public PhraseQuery Enclosing_Instance
- {
- get
- {
- return enclosingInstance;
- }
-
- }
- private Similarity similarity;
- private float value_Renamed;
- private float idf;
- private float queryNorm;
- private float queryWeight;
-
- public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)
- {
- InitBlock(enclosingInstance);
- this.similarity = Enclosing_Instance.GetSimilarity(searcher);
-
- idf = similarity.Idf(Enclosing_Instance.terms, searcher);
- }
-
- public override System.String ToString()
- {
- return "weight(" + Enclosing_Instance + ")";
- }
-
- public virtual Query GetQuery()
- {
- return Enclosing_Instance;
- }
- public virtual float GetValue()
- {
- return value_Renamed;
- }
-
- public virtual float SumOfSquaredWeights()
- {
- queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
- return queryWeight * queryWeight; // square it
- }
-
- public virtual void Normalize(float queryNorm)
- {
- this.queryNorm = queryNorm;
- queryWeight *= queryNorm; // normalize query weight
- value_Renamed = queryWeight * idf; // idf for document
- }
-
- public virtual Scorer Scorer(IndexReader reader)
- {
- if (Enclosing_Instance.terms.Count == 0)
- // optimize zero-term case
- return null;
-
- TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
- for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
- {
- TermPositions p = reader.TermPositions((Term) Enclosing_Instance.terms[i]);
- if (p == null)
- return null;
- tps[i] = p;
- }
-
- if (Enclosing_Instance.slop == 0)
- // optimize exact case
- return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
- else
- return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
- }
-
- public virtual Explanation Explain(IndexReader reader, int doc)
- {
-
- Explanation result = new Explanation();
- result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
-
- System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
- System.Text.StringBuilder query = new System.Text.StringBuilder();
- query.Append('"');
- for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
- {
- if (i != 0)
- {
- docFreqs.Append(" ");
- query.Append(" ");
- }
-
- Term term = (Term) Enclosing_Instance.terms[i];
-
- docFreqs.Append(term.Text());
- docFreqs.Append("=");
- docFreqs.Append(reader.DocFreq(term));
-
- query.Append(term.Text());
- }
- query.Append('"');
-
- Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");
-
- // explain query weight
- Explanation queryExpl = new Explanation();
- queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
-
- Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
- if (Enclosing_Instance.GetBoost() != 1.0f)
- queryExpl.AddDetail(boostExpl);
- queryExpl.AddDetail(idfExpl);
-
- Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
- queryExpl.AddDetail(queryNormExpl);
-
- queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
-
- result.AddDetail(queryExpl);
-
- // explain field weight
- Explanation fieldExpl = new Explanation();
- fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");
-
- Explanation tfExpl = Scorer(reader).Explain(doc);
- fieldExpl.AddDetail(tfExpl);
- fieldExpl.AddDetail(idfExpl);
-
- Explanation fieldNormExpl = new Explanation();
- byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
- float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):0.0f;
- fieldNormExpl.SetValue(fieldNorm);
- fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
- fieldExpl.AddDetail(fieldNormExpl);
-
- fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
-
- result.AddDetail(fieldExpl);
-
- // combine them
- result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
-
- if (queryExpl.GetValue() == 1.0f)
- return fieldExpl;
-
- return result;
- }
- }
-
- protected internal override Weight CreateWeight(Searcher searcher)
- {
- if (terms.Count == 1)
- {
- // optimize one-term case
- Term term = (Term) terms[0];
- Query termQuery = new TermQuery(term);
- termQuery.SetBoost(GetBoost());
- return termQuery.CreateWeight(searcher);
- }
- return new PhraseWeight(this, searcher);
- }
-
- /// <seealso cref="Lucene.Net.search.Query.ExtractTerms(java.util.Set)">
- /// </seealso>
- public override void ExtractTerms(System.Collections.Hashtable queryTerms)
- {
- foreach (Term term in terms)
- {
- queryTerms.Add(term, term);
- }
- }
-
- /// <summary>Prints a user-readable version of this query. </summary>
- public override System.String ToString(System.String f)
- {
- System.Text.StringBuilder buffer = new System.Text.StringBuilder();
- if (!field.Equals(f))
- {
- buffer.Append(field);
- buffer.Append(":");
- }
-
- buffer.Append(""");
- for (int i = 0; i < terms.Count; i++)
- {
- buffer.Append(((Term) terms[i]).Text());
- if (i != terms.Count - 1)
- buffer.Append(" ");
- }
- buffer.Append(""");
-
- if (slop != 0)
- {
- buffer.Append("~");
- buffer.Append(slop);
- }
-
- buffer.Append(ToStringUtils.Boost(GetBoost()));
-
- return buffer.ToString();
- }
-
- /// <summary>Returns true iff <code>o</code> is equal to this. </summary>
- public override bool Equals(System.Object o)
- {
- if (!(o is PhraseQuery))
- return false;
- PhraseQuery other = (PhraseQuery) o;
- return (this.GetBoost() == other.GetBoost()) &&
- (this.slop == other.slop) &&
- this.terms.Equals(other.terms) &&
- this.positions.Equals(other.positions);
- }
-
- /// <summary>Returns a hash code value for this object.</summary>
- public override int GetHashCode()
- {
- return BitConverter.ToInt32(BitConverter.GetBytes(GetBoost()), 0) ^ slop ^ terms.GetHashCode() ^ positions.GetHashCode();
- }
- // {{Aroush-1.9}} Do we need this?!
- override public System.Object Clone()
- {
- return null;
- }
- }
- }