IndexModifier.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:23k
- /*
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using Analyzer = Lucene.Net.Analysis.Analyzer;
- using Document = Lucene.Net.Documents.Document;
- using Directory = Lucene.Net.Store.Directory;
- using FSDirectory = Lucene.Net.Store.FSDirectory;
- namespace Lucene.Net.Index
- {
-
- /// <summary> A class to modify an index, i.e. to delete and add documents. This
- /// class hides {@link IndexReader} and {@link IndexWriter} so that you
- /// do not need to care about implementation details such as that adding
- /// documents is done via IndexWriter and deletion is done via IndexReader.
- ///
- /// <p>Note that you cannot create more than one <code>IndexModifier</code> object
- /// on the same directory at the same time.
- ///
- /// <p>Example usage:
- ///
- /// <!-- ======================================================== -->
- /// <!-- = Java Sourcecode to HTML automatically converted code = -->
- /// <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
- /// <!-- = Further information: http://www.java2html.de = -->
- /// <div align="left" class="java">
- /// <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
- /// <tr>
- /// <!-- start source code -->
- /// <td nowrap="nowrap" valign="top" align="left">
- /// <code>
- /// <font color="#ffffff"> </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
- /// <font color="#ffffff"> </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
- /// </td>
- /// <!-- end source code -->
- /// </tr>
- /// </table>
- /// </div>
- /// <!-- = END of automatically generated HTML code = -->
- /// <!-- ======================================================== -->
- ///
- /// <p>Not all methods of IndexReader and IndexWriter are offered by this
- /// class. If you need access to additional methods, either use those classes
- /// directly or implement your own class that extends <code>IndexModifier</code>.
- ///
- /// <p>Although an instance of this class can be used from more than one
- /// thread, you will not get the best performance. You might want to use
- /// IndexReader and IndexWriter directly for that (but you will need to
- /// care about synchronization yourself then).
- ///
- /// <p>While you can freely mix calls to add() and delete() using this class,
- /// you should batch you calls for best performance. For example, if you
- /// want to update 20 documents, you should first delete all those documents,
- /// then add all the new documents.
- ///
- /// </summary>
- /// <author> Daniel Naber
- /// </author>
- public class IndexModifier
- {
- private void InitBlock()
- {
- maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
- maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
- mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
- }
-
- protected internal IndexWriter indexWriter = null;
- protected internal IndexReader indexReader = null;
-
- protected internal Directory directory = null;
- protected internal Analyzer analyzer = null;
- protected internal bool open = false;
-
- // Lucene defaults:
- protected internal System.IO.StreamWriter infoStream = null;
- protected internal bool useCompoundFile = true;
- protected internal int maxBufferedDocs;
- protected internal int maxFieldLength;
- protected internal int mergeFactor;
-
- /// <summary> Open an index with write access.
- ///
- /// </summary>
- /// <param name="directory">the index directory
- /// </param>
- /// <param name="analyzer">the analyzer to use for adding new documents
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
- /// <code>false</code> to append to the existing index
- /// </param>
- public IndexModifier(Directory directory, Analyzer analyzer, bool create)
- {
- InitBlock();
- Init(directory, analyzer, create);
- }
-
- /// <summary> Open an index with write access.
- ///
- /// </summary>
- /// <param name="dirName">the index directory
- /// </param>
- /// <param name="analyzer">the analyzer to use for adding new documents
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
- /// <code>false</code> to append to the existing index
- /// </param>
- public IndexModifier(System.String dirName, Analyzer analyzer, bool create)
- {
- InitBlock();
- Directory dir = FSDirectory.GetDirectory(dirName, create);
- Init(dir, analyzer, create);
- }
-
- /// <summary> Open an index with write access.
- ///
- /// </summary>
- /// <param name="file">the index directory
- /// </param>
- /// <param name="analyzer">the analyzer to use for adding new documents
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
- /// <code>false</code> to append to the existing index
- /// </param>
- public IndexModifier(System.IO.FileInfo file, Analyzer analyzer, bool create)
- {
- InitBlock();
- Directory dir = FSDirectory.GetDirectory(file, create);
- Init(dir, analyzer, create);
- }
-
- /// <summary> Initialize an IndexWriter.</summary>
- /// <throws> IOException </throws>
- protected internal virtual void Init(Directory directory, Analyzer analyzer, bool create)
- {
- this.directory = directory;
- lock (this.directory)
- {
- this.analyzer = analyzer;
- indexWriter = new IndexWriter(directory, analyzer, create);
- open = true;
- }
- }
-
- /// <summary> Throw an IllegalStateException if the index is closed.</summary>
- /// <throws> IllegalStateException </throws>
- protected internal virtual void AssureOpen()
- {
- if (!open)
- {
- throw new System.SystemException("Index is closed");
- }
- }
-
- /// <summary> Close the IndexReader and open an IndexWriter.</summary>
- /// <throws> IOException </throws>
- protected internal virtual void CreateIndexWriter()
- {
- if (indexWriter == null)
- {
- if (indexReader != null)
- {
- indexReader.Close();
- indexReader = null;
- }
- indexWriter = new IndexWriter(directory, analyzer, false);
- indexWriter.SetInfoStream(infoStream);
- indexWriter.SetUseCompoundFile(useCompoundFile);
- indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
- indexWriter.SetMaxFieldLength(maxFieldLength);
- indexWriter.SetMergeFactor(mergeFactor);
- }
- }
-
- /// <summary> Close the IndexWriter and open an IndexReader.</summary>
- /// <throws> IOException </throws>
- protected internal virtual void CreateIndexReader()
- {
- if (indexReader == null)
- {
- if (indexWriter != null)
- {
- indexWriter.Close();
- indexWriter = null;
- }
- indexReader = IndexReader.Open(directory);
- }
- }
-
- /// <summary> Make sure all changes are written to disk.</summary>
- /// <throws> IOException </throws>
- public virtual void Flush()
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.Close();
- indexWriter = null;
- CreateIndexWriter();
- }
- else
- {
- indexReader.Close();
- indexReader = null;
- CreateIndexReader();
- }
- }
- }
-
- /// <summary> Adds a document to this index, using the provided analyzer instead of the
- /// one specific in the constructor. If the document contains more than
- /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
- /// discarded.
- /// </summary>
- /// <seealso cref="IndexWriter.AddDocument(Document, Analyzer)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void AddDocument(Document doc, Analyzer docAnalyzer)
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- if (docAnalyzer != null)
- indexWriter.AddDocument(doc, docAnalyzer);
- else
- indexWriter.AddDocument(doc);
- }
- }
-
- /// <summary> Adds a document to this index. If the document contains more than
- /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
- /// discarded.
- /// </summary>
- /// <seealso cref="IndexWriter.AddDocument(Document)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void AddDocument(Document doc)
- {
- AddDocument(doc, null);
- }
-
- /// <summary> Deletes all documents containing <code>term</code>.
- /// This is useful if one uses a document field to hold a unique ID string for
- /// the document. Then to delete such a document, one merely constructs a
- /// term with the appropriate field and the unique ID string as its text and
- /// passes it to this method. Returns the number of documents deleted.
- /// </summary>
- /// <returns> the number of documents deleted
- /// </returns>
- /// <seealso cref="IndexReader.DeleteDocuments(Term)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual int DeleteDocuments(Term term)
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexReader();
- return indexReader.DeleteDocuments(term);
- }
- }
-
- /// <summary> Deletes all documents containing <code>term</code>.
- /// This is useful if one uses a document field to hold a unique ID string for
- /// the document. Then to delete such a document, one merely constructs a
- /// term with the appropriate field and the unique ID string as its text and
- /// passes it to this method. Returns the number of documents deleted.
- /// </summary>
- /// <returns> the number of documents deleted
- /// </returns>
- /// <seealso cref="IndexReader.DeleteDocuments(Term)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- /// <deprecated> Use {@link #DeleteDocuments(Term)} instead.
- /// </deprecated>
- public virtual int Delete(Term term)
- {
- return DeleteDocuments(term);
- }
-
- /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
- /// <seealso cref="IndexReader.DeleteDocument(int)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void DeleteDocument(int docNum)
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexReader();
- indexReader.DeleteDocument(docNum);
- }
- }
-
- /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
- /// <seealso cref="IndexReader.DeleteDocument(int)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- /// <deprecated> Use {@link #DeleteDocument(int)} instead.
- /// </deprecated>
- public virtual void Delete(int docNum)
- {
- DeleteDocument(docNum);
- }
-
- /// <summary> Returns the number of documents currently in this index.</summary>
- /// <seealso cref="IndexWriter.DocCount()">
- /// </seealso>
- /// <seealso cref="IndexReader.NumDocs()">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual int DocCount()
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- return indexWriter.DocCount();
- }
- else
- {
- return indexReader.NumDocs();
- }
- }
- }
-
- /// <summary> Merges all segments together into a single segment, optimizing an index
- /// for search.
- /// </summary>
- /// <seealso cref="IndexWriter.Optimize()">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void Optimize()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- indexWriter.Optimize();
- }
- }
-
- /// <summary> If non-null, information about merges and a message when
- /// {@link #GetMaxFieldLength()} is reached will be printed to this.
- /// <p>Example: <tt>index.setInfoStream(System.err);</tt>
- /// </summary>
- /// <seealso cref="IndexWriter.SetInfoStream(PrintStream)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.SetInfoStream(infoStream);
- }
- this.infoStream = infoStream;
- }
- }
-
- /// <throws> IOException </throws>
- /// <seealso cref="IndexModifier.SetInfoStream(PrintStream)">
- /// </seealso>
- public virtual System.IO.TextWriter GetInfoStream()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- return indexWriter.GetInfoStream();
- }
- }
-
- /// <summary> Setting to turn on usage of a compound file. When on, multiple files
- /// for each segment are merged into a single file once the segment creation
- /// is finished. This is done regardless of what directory is in use.
- /// </summary>
- /// <seealso cref="IndexWriter.SetUseCompoundFile(boolean)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void SetUseCompoundFile(bool useCompoundFile)
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.SetUseCompoundFile(useCompoundFile);
- }
- this.useCompoundFile = useCompoundFile;
- }
- }
-
- /// <throws> IOException </throws>
- /// <seealso cref="IndexModifier.SetUseCompoundFile(boolean)">
- /// </seealso>
- public virtual bool GetUseCompoundFile()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- return indexWriter.GetUseCompoundFile();
- }
- }
-
- /// <summary> The maximum number of terms that will be indexed for a single field in a
- /// document. This limits the amount of memory required for indexing, so that
- /// collections with very large files will not crash the indexing process by
- /// running out of memory.<p/>
- /// Note that this effectively truncates large documents, excluding from the
- /// index terms that occur further in the document. If you know your source
- /// documents are large, be sure to set this value high enough to accomodate
- /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
- /// By default, no more than 10,000 terms will be indexed for a field.
- /// </summary>
- /// <seealso cref="IndexWriter.SetMaxFieldLength(int)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void SetMaxFieldLength(int maxFieldLength)
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.SetMaxFieldLength(maxFieldLength);
- }
- this.maxFieldLength = maxFieldLength;
- }
- }
-
- /// <throws> IOException </throws>
- /// <seealso cref="IndexModifier.SetMaxFieldLength(int)">
- /// </seealso>
- public virtual int GetMaxFieldLength()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- return indexWriter.GetMaxFieldLength();
- }
- }
-
- /// <summary> The maximum number of terms that will be indexed for a single field in a
- /// document. This limits the amount of memory required for indexing, so that
- /// collections with very large files will not crash the indexing process by
- /// running out of memory.<p/>
- /// Note that this effectively truncates large documents, excluding from the
- /// index terms that occur further in the document. If you know your source
- /// documents are large, be sure to set this value high enough to accomodate
- /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
- /// By default, no more than 10,000 terms will be indexed for a field.
- /// </summary>
- /// <seealso cref="IndexWriter.SetMaxBufferedDocs(int)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
- }
- this.maxBufferedDocs = maxBufferedDocs;
- }
- }
-
- /// <throws> IOException </throws>
- /// <seealso cref="IndexModifier.SetMaxBufferedDocs(int)">
- /// </seealso>
- public virtual int GetMaxBufferedDocs()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- return indexWriter.GetMaxBufferedDocs();
- }
- }
-
- /// <summary> Determines how often segment indices are merged by addDocument(). With
- /// smaller values, less RAM is used while indexing, and searches on
- /// unoptimized indices are faster, but indexing speed is slower. With larger
- /// values, more RAM is used during indexing, and while searches on unoptimized
- /// indices are slower, indexing is faster. Thus larger values (> 10) are best
- /// for batch index creation, and smaller values (< 10) for indices that are
- /// interactively maintained.
- /// <p>This must never be less than 2. The default value is 10.
- ///
- /// </summary>
- /// <seealso cref="IndexWriter.SetMergeFactor(int)">
- /// </seealso>
- /// <throws> IllegalStateException if the index is closed </throws>
- public virtual void SetMergeFactor(int mergeFactor)
- {
- lock (directory)
- {
- AssureOpen();
- if (indexWriter != null)
- {
- indexWriter.SetMergeFactor(mergeFactor);
- }
- this.mergeFactor = mergeFactor;
- }
- }
-
- /// <throws> IOException </throws>
- /// <seealso cref="IndexModifier.SetMergeFactor(int)">
- /// </seealso>
- public virtual int GetMergeFactor()
- {
- lock (directory)
- {
- AssureOpen();
- CreateIndexWriter();
- return indexWriter.GetMergeFactor();
- }
- }
-
- /// <summary> Close this index, writing all pending changes to disk.
- ///
- /// </summary>
- /// <throws> IllegalStateException if the index has been closed before already </throws>
- public virtual void Close()
- {
- lock (directory)
- {
- if (!open)
- throw new System.SystemException("Index is closed already");
- if (indexWriter != null)
- {
- indexWriter.Close();
- indexWriter = null;
- }
- else
- {
- indexReader.Close();
- indexReader = null;
- }
- open = false;
- }
- }
-
- public override System.String ToString()
- {
- return "Index@" + directory;
- }
-
- /*
- // used as an example in the javadoc:
- public static void main(String[] args) throws IOException {
- Analyzer analyzer = new StandardAnalyzer();
- // create an index in /tmp/index, overwriting an existing one:
- IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
- Document doc = new Document();
- doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
- indexModifier.addDocument(doc);
- int deleted = indexModifier.delete(new Term("id", "1"));
- System.out.println("Deleted " + deleted + " document");
- indexModifier.flush();
- System.out.println(indexModifier.docCount() + " docs in index");
- indexModifier.close();
- }*/
- }
- }