Field.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:26k
- /*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using IndexReader = Lucene.Net.Index.IndexReader;
- using Hits = Lucene.Net.Search.Hits;
- using Similarity = Lucene.Net.Search.Similarity;
- using Parameter = Lucene.Net.Util.Parameter;
- namespace Lucene.Net.Documents
- {
-
- /// <summary>A field is a section of a Document. Each field has two parts, a name and a
- /// value. Values may be free text, provided as a String or as a Reader, or they
- /// may be atomic keywords, which are not further processed. Such keywords may
- /// be used to represent dates, urls, etc. Fields are optionally stored in the
- /// index, so that they may be returned with hits on the document.
- /// </summary>
-
- [Serializable]
- public sealed class Field
- {
- private System.String name = "body";
-
- // the one and only data object for all different kind of field values
- private System.Object fieldsData = null;
-
- private bool storeTermVector = false;
- private bool storeOffsetWithTermVector = false;
- private bool storePositionWithTermVector = false;
- private bool omitNorms = false;
- private bool isStored = false;
- private bool isIndexed = true;
- private bool isTokenized = true;
- private bool isBinary = false;
- private bool isCompressed = false;
-
- private float boost = 1.0f;
-
- [Serializable]
- public sealed class Store : Parameter
- {
-
- internal Store(System.String name) : base(name)
- {
- }
-
- /// <summary>Store the original field value in the index in a compressed form. This is
- /// useful for long documents and for binary valued fields.
- /// </summary>
- public static readonly Store COMPRESS = new Store("COMPRESS");
-
- /// <summary>Store the original field value in the index. This is useful for short texts
- /// like a document's title which should be displayed with the results. The
- /// value is stored in its original form, i.e. no analyzer is used before it is
- /// stored.
- /// </summary>
- public static readonly Store YES = new Store("YES");
-
- /// <summary>Do not store the field value in the index. </summary>
- public static readonly Store NO = new Store("NO");
- }
-
- [Serializable]
- public sealed class Index : Parameter
- {
-
- internal Index(System.String name) : base(name)
- {
- }
-
- /// <summary>Do not index the field value. This field can thus not be searched,
- /// but one can still access its contents provided it is
- /// {@link Field.Store stored}.
- /// </summary>
- public static readonly Index NO = new Index("NO");
-
- /// <summary>Index the field's value so it can be searched. An Analyzer will be used
- /// to tokenize and possibly further normalize the text before its
- /// terms will be stored in the index. This is useful for common text.
- /// </summary>
- public static readonly Index TOKENIZED = new Index("TOKENIZED");
-
- /// <summary>Index the field's value without using an Analyzer, so it can be searched.
- /// As no analyzer is used the value will be stored as a single term. This is
- /// useful for unique Ids like product numbers.
- /// </summary>
- public static readonly Index UN_TOKENIZED = new Index("UN_TOKENIZED");
-
- /// <summary>Index the field's value without an Analyzer, and disable
- /// the storing of norms. No norms means that index-time boosting
- /// and field length normalization will be disabled. The benefit is
- /// less memory usage as norms take up one byte per indexed field
- /// for every document in the index.
- /// </summary>
- public static readonly Index NO_NORMS = new Index("NO_NORMS");
- }
-
- [Serializable]
- public sealed class TermVector : Parameter
- {
-
- internal TermVector(System.String name) : base(name)
- {
- }
-
- /// <summary>Do not store term vectors. </summary>
- public static readonly TermVector NO = new TermVector("NO");
-
- /// <summary>Store the term vectors of each document. A term vector is a list
- /// of the document's terms and their number of occurences in that document.
- /// </summary>
- public static readonly TermVector YES = new TermVector("YES");
-
- /// <summary> Store the term vector + token position information
- ///
- /// </summary>
- /// <seealso cref="YES">
- /// </seealso>
- public static readonly TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS");
-
- /// <summary> Store the term vector + Token offset information
- ///
- /// </summary>
- /// <seealso cref="YES">
- /// </seealso>
- public static readonly TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS");
-
- /// <summary> Store the term vector + Token position and offset information
- ///
- /// </summary>
- /// <seealso cref="YES">
- /// </seealso>
- /// <seealso cref="WITH_POSITIONS">
- /// </seealso>
- /// <seealso cref="WITH_OFFSETS">
- /// </seealso>
- public static readonly TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
- }
-
- /// <summary>Sets the boost factor hits on this field. This value will be
- /// multiplied into the score of all hits on this this field of this
- /// document.
- ///
- /// <p>The boost is multiplied by {@link Document#GetBoost()} of the document
- /// containing this field. If a document has multiple fields with the same
- /// name, all such values are multiplied together. This product is then
- /// multipled by the value {@link Similarity#LengthNorm(String,int)}, and
- /// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the
- /// index. One should attempt to ensure that this product does not overflow
- /// the range of that encoding.
- ///
- /// </summary>
- /// <seealso cref="Document.SetBoost(float)">
- /// </seealso>
- /// <seealso cref="Similarity.LengthNorm(String, int)">
- /// </seealso>
- /// <seealso cref="Similarity.EncodeNorm(float)">
- /// </seealso>
- public void SetBoost(float boost)
- {
- this.boost = boost;
- }
-
- /// <summary>Returns the boost factor for hits for this field.
- ///
- /// <p>The default value is 1.0.
- ///
- /// <p>Note: this value is not stored directly with the document in the index.
- /// Documents returned from {@link IndexReader#Document(int)} and
- /// {@link Hits#Doc(int)} may thus not have the same value present as when
- /// this field was indexed.
- ///
- /// </summary>
- /// <seealso cref="SetBoost(float)">
- /// </seealso>
- public float GetBoost()
- {
- return boost;
- }
-
- /// <summary>Constructs a String-valued Field that is not tokenized, but is indexed
- /// and stored. Useful for non-text fields, e.g. date or url.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
- /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
- /// </deprecated>
- public static Field Keyword(System.String name, System.String value_Renamed)
- {
- return new Field(name, value_Renamed, true, true, false);
- }
-
- /// <summary>Constructs a String-valued Field that is not tokenized nor indexed,
- /// but is stored in the index, for return with hits.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
- /// Field(name, value, Field.Store.YES, Field.Index.NO)} instead
- /// </deprecated>
- public static Field UnIndexed(System.String name, System.String value_Renamed)
- {
- return new Field(name, value_Renamed, true, false, false);
- }
-
- /// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// and is stored in the index, for return with hits. Useful for short text
- /// fields, like "title" or "subject". Term vector will not be stored for this field.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
- /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED)} instead
- /// </deprecated>
- public static Field Text(System.String name, System.String value_Renamed)
- {
- return Text(name, value_Renamed, false);
- }
-
- /// <summary>Constructs a Date-valued Field that is not tokenized and is indexed,
- /// and stored in the index, for return with hits.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
- /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
- /// </deprecated>
- public static Field Keyword(System.String name, System.DateTime value_Renamed)
- {
- return new Field(name, DateField.DateToString(value_Renamed), true, true, false);
- }
-
- /// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// and is stored in the index, for return with hits. Useful for short text
- /// fields, like "title" or "subject".
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
- /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED, storeTermVector)} instead
- /// </deprecated>
- public static Field Text(System.String name, System.String value_Renamed, bool storeTermVector)
- {
- return new Field(name, value_Renamed, true, true, true, storeTermVector);
- }
-
- /// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// but that is not stored in the index. Term vector will not be stored for this field.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
- /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED)} instead
- /// </deprecated>
- public static Field UnStored(System.String name, System.String value_Renamed)
- {
- return UnStored(name, value_Renamed, false);
- }
-
- /// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// but that is not stored in the index.
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
- /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED, storeTermVector)} instead
- /// </deprecated>
- public static Field UnStored(System.String name, System.String value_Renamed, bool storeTermVector)
- {
- return new Field(name, value_Renamed, false, true, true, storeTermVector);
- }
-
- /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
- /// not stored in the index verbatim. Useful for longer text fields, like
- /// "body". Term vector will not be stored for this field.
- /// </summary>
- /// <deprecated> use {@link #Field(String, Reader) Field(name, value)} instead
- /// </deprecated>
- public static Field Text(System.String name, System.IO.TextReader value_Renamed)
- {
- return Text(name, value_Renamed, false);
- }
-
- /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
- /// not stored in the index verbatim. Useful for longer text fields, like
- /// "body".
- /// </summary>
- /// <deprecated> use {@link #Field(String, Reader, Field.TermVector)
- /// Field(name, value, storeTermVector)} instead
- /// </deprecated>
- public static Field Text(System.String name, System.IO.TextReader value_Renamed, bool storeTermVector)
- {
- Field f = new Field(name, value_Renamed);
- f.storeTermVector = storeTermVector;
- return f;
- }
-
- /// <summary>Returns the name of the field as an interned string.
- /// For example "date", "title", "body", ...
- /// </summary>
- public System.String Name()
- {
- return name;
- }
-
- /// <summary>The value of the field as a String, or null. If null, the Reader value
- /// or binary value is used. Exactly one of stringValue(), readerValue(), and
- /// binaryValue() must be set.
- /// </summary>
- public System.String StringValue()
- {
- return fieldsData is System.String ? (System.String) fieldsData : null;
- }
-
- /// <summary>The value of the field as a Reader, or null. If null, the String value
- /// or binary value is used. Exactly one of stringValue(), readerValue(),
- /// and binaryValue() must be set.
- /// </summary>
- public System.IO.TextReader ReaderValue()
- {
- return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null;
- }
-
- /// <summary>The value of the field in Binary, or null. If null, the Reader or
- /// String value is used. Exactly one of stringValue(), readerValue() and
- /// binaryValue() must be set.
- /// </summary>
- public byte[] BinaryValue()
- {
- return fieldsData is byte[] ? (byte[]) fieldsData : null;
- }
-
- /// <summary> Create a field by specifying its name, value and how it will
- /// be saved in the index. Term vectors will not be stored in the index.
- ///
- /// </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="value">The string to process
- /// </param>
- /// <param name="store">Whether <code>value</code> should be stored in the index
- /// </param>
- /// <param name="index">Whether the field should be indexed, and if so, if it should
- /// be tokenized before indexing
- /// </param>
- /// <throws> NullPointerException if name or value is <code>null</code> </throws>
- /// <throws> IllegalArgumentException if the field is neither stored nor indexed </throws>
- public Field(System.String name, System.String value_Renamed, Store store, Index index) : this(name, value_Renamed, store, index, TermVector.NO)
- {
- }
-
- /// <summary> Create a field by specifying its name, value and how it will
- /// be saved in the index.
- ///
- /// </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="value">The string to process
- /// </param>
- /// <param name="store">Whether <code>value</code> should be stored in the index
- /// </param>
- /// <param name="index">Whether the field should be indexed, and if so, if it should
- /// be tokenized before indexing
- /// </param>
- /// <param name="termVector">Whether term vector should be stored
- /// </param>
- /// <throws> NullPointerException if name or value is <code>null</code> </throws>
- /// <throws> IllegalArgumentException in any of the following situations: </throws>
- /// <summary> <ul>
- /// <li>the field is neither stored nor indexed</li>
- /// <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
- /// </ul>
- /// </summary>
- public Field(System.String name, System.String value_Renamed, Store store, Index index, TermVector termVector)
- {
- if (name == null)
- throw new System.NullReferenceException("name cannot be null");
- if (value_Renamed == null)
- throw new System.NullReferenceException("value cannot be null");
- if (index == Index.NO && store == Store.NO)
- throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
- if (index == Index.NO && termVector != TermVector.NO)
- throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
-
- this.name = String.Intern(name); // field names are interned
- this.fieldsData = value_Renamed;
-
- if (store == Store.YES)
- {
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS)
- {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO)
- {
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- {
- throw new System.ArgumentException("unknown store parameter " + store);
- }
-
- if (index == Index.NO)
- {
- this.isIndexed = false;
- this.isTokenized = false;
- }
- else if (index == Index.TOKENIZED)
- {
- this.isIndexed = true;
- this.isTokenized = true;
- }
- else if (index == Index.UN_TOKENIZED)
- {
- this.isIndexed = true;
- this.isTokenized = false;
- }
- else if (index == Index.NO_NORMS)
- {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- }
- else
- {
- throw new System.ArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
- SetStoreTermVector(termVector);
- }
-
- /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
- /// not be stored.
- ///
- /// </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="reader">The reader with the content
- /// </param>
- /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
- public Field(System.String name, System.IO.TextReader reader) : this(name, reader, TermVector.NO)
- {
- }
-
- /// <summary> Create a tokenized and indexed field that is not stored, optionally with
- /// storing term vectors.
- ///
- /// </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="reader">The reader with the content
- /// </param>
- /// <param name="termVector">Whether term vector should be stored
- /// </param>
- /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
- public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
- {
- if (name == null)
- throw new System.NullReferenceException("name cannot be null");
- if (reader == null)
- throw new System.NullReferenceException("reader cannot be null");
-
- this.name = String.Intern(name); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
- this.isCompressed = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- SetStoreTermVector(termVector);
- }
-
- /// <summary>Create a field by specifying all parameters except for <code>storeTermVector</code>,
- /// which is set to <code>false</code>.
- ///
- /// </summary>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)} instead
- /// </deprecated>
- public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token) : this(name, string_Renamed, store, index, token, false)
- {
- }
-
-
- /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
- ///
- /// </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="value">The binary value
- /// </param>
- /// <param name="store">How <code>value</code> should be stored (compressed or not.)
- /// </param>
- public Field(System.String name, byte[] value_Renamed, Store store)
- {
- if (name == null)
- throw new System.ArgumentException("name cannot be null");
- if (value_Renamed == null)
- throw new System.ArgumentException("value cannot be null");
-
- this.name = String.Intern(name);
- this.fieldsData = value_Renamed;
-
- if (store == Store.YES)
- {
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS)
- {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO)
- throw new System.ArgumentException("binary values can't be unstored");
- else
- {
- throw new System.ArgumentException("unknown store parameter " + store);
- }
-
- this.isIndexed = false;
- this.isTokenized = false;
-
- this.isBinary = true;
-
- SetStoreTermVector(TermVector.NO);
- }
-
- /// <summary> </summary>
- /// <param name="name">The name of the field
- /// </param>
- /// <param name="string">The string to process
- /// </param>
- /// <param name="store">true if the field should store the string
- /// </param>
- /// <param name="index">true if the field should be indexed
- /// </param>
- /// <param name="token">true if the field should be tokenized
- /// </param>
- /// <param name="storeTermVector">true if we should store the Term Vector info
- ///
- /// </param>
- /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)} instead
- /// </deprecated>
- public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token, bool storeTermVector)
- {
- if (name == null)
- throw new System.NullReferenceException("name cannot be null");
- if (string_Renamed == null)
- throw new System.NullReferenceException("value cannot be null");
- if (!index && storeTermVector)
- throw new System.ArgumentException("cannot store a term vector for fields that are not indexed");
-
- this.name = String.Intern(name); // field names are interned
- this.fieldsData = string_Renamed;
- this.isStored = store;
- this.isIndexed = index;
- this.isTokenized = token;
- this.storeTermVector = storeTermVector;
- }
-
- private void SetStoreTermVector(TermVector termVector)
- {
- if (termVector == TermVector.NO)
- {
- this.storeTermVector = false;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == TermVector.YES)
- {
- this.storeTermVector = true;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == TermVector.WITH_POSITIONS)
- {
- this.storeTermVector = true;
- this.storePositionWithTermVector = true;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == TermVector.WITH_OFFSETS)
- {
- this.storeTermVector = true;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = true;
- }
- else if (termVector == TermVector.WITH_POSITIONS_OFFSETS)
- {
- this.storeTermVector = true;
- this.storePositionWithTermVector = true;
- this.storeOffsetWithTermVector = true;
- }
- else
- {
- throw new System.ArgumentException("unknown termVector parameter " + termVector);
- }
- }
-
- /// <summary>True iff the value of the field is to be stored in the index for return
- /// with search hits. It is an error for this to be true if a field is
- /// Reader-valued.
- /// </summary>
- public bool IsStored()
- {
- return isStored;
- }
-
- /// <summary>True iff the value of the field is to be indexed, so that it may be
- /// searched on.
- /// </summary>
- public bool IsIndexed()
- {
- return isIndexed;
- }
-
- /// <summary>True iff the value of the field should be tokenized as text prior to
- /// indexing. Un-tokenized fields are indexed as a single word and may not be
- /// Reader-valued.
- /// </summary>
- public bool IsTokenized()
- {
- return isTokenized;
- }
-
- /// <summary>True if the value of the field is stored and compressed within the index </summary>
- public bool IsCompressed()
- {
- return isCompressed;
- }
-
- /// <summary>True iff the term or terms used to index this field are stored as a term
- /// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}.
- /// These methods do not provide access to the original content of the field,
- /// only to terms used to index it. If the original content must be
- /// preserved, use the <code>stored</code> attribute instead.
- ///
- /// </summary>
- /// <seealso cref="IndexReader.GetTermFreqVector(int, String)">
- /// </seealso>
- public bool IsTermVectorStored()
- {
- return storeTermVector;
- }
-
- /// <summary> True iff terms are stored as term vector together with their offsets
- /// (start and end positon in source text).
- /// </summary>
- public bool IsStoreOffsetWithTermVector()
- {
- return storeOffsetWithTermVector;
- }
-
- /// <summary> True iff terms are stored as term vector together with their token positions.</summary>
- public bool IsStorePositionWithTermVector()
- {
- return storePositionWithTermVector;
- }
-
- /// <summary>True iff the value of the filed is stored as binary </summary>
- public bool IsBinary()
- {
- return isBinary;
- }
-
- /// <summary>True if norms are omitted for this indexed field </summary>
- public bool GetOmitNorms()
- {
- return omitNorms;
- }
-
- /// <summary>Expert:
- ///
- /// If set, omit normalization factors associated with this indexed field.
- /// This effectively disables indexing boosts and length normalization for this field.
- /// </summary>
- public void SetOmitNorms(bool omitNorms)
- {
- this.omitNorms = omitNorms;
- }
-
- /// <summary>Prints a Field for human consumption. </summary>
- public override System.String ToString()
- {
- System.Text.StringBuilder result = new System.Text.StringBuilder();
- if (isStored)
- {
- result.Append("stored");
- if (isCompressed)
- result.Append("/compressed");
- else
- result.Append("/uncompressed");
- }
- if (isIndexed)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("indexed");
- }
- if (isTokenized)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("tokenized");
- }
- if (storeTermVector)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("termVector");
- }
- if (storeOffsetWithTermVector)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("termVectorOffsets");
- }
- if (storePositionWithTermVector)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("termVectorPosition");
- }
- if (isBinary)
- {
- if (result.Length > 0)
- result.Append(",");
- result.Append("binary");
- }
- if (omitNorms)
- {
- result.Append(",omitNorms");
- }
- result.Append('<');
- result.Append(name);
- result.Append(':');
-
- if (fieldsData != null)
- {
- result.Append(fieldsData);
- }
-
- result.Append('>');
- return result.ToString();
- }
- }
- }