LuceneIndexUnsearch.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:5k
源码类别:

搜索引擎

开发平台:

Java

  1. package chapter5;
  2. import java.util.Date;
  3. import java.io.*;
  4. import org.apache.lucene.search.Hits;
  5. import org.apache.lucene.search.IndexSearcher;
  6. import org.apache.lucene.search.Query;
  7. import org.apache.lucene.search.TermQuery;
  8. import org.apache.lucene.store.Directory; 
  9. import org.apache.lucene.store.FSDirectory;
  10. import org.apache.lucene.document.Field;
  11. import org.apache.lucene.document.Document;
  12. import org.apache.lucene.index.IndexWriter;
  13. import org.apache.lucene.index.IndexReader;
  14. import org.apache.lucene.index.Term;
  15. import org.apache.lucene.analysis.Analyzer;
  16. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  17. import org.apache.lucene.analysis.SimpleAnalyzer;
  18. public class LuceneIndexUnsearch {
  19. private static String Dest_Index_Path = "D:\workshop\TextwithIndex";
  20. private static String Dest_NoIndex_Path = "D:\workshop\TextwithoutIndex";
  21. private static String Text_File_Path = "D:\workshop\ch2\wholeaximofu.txt";
  22. public static void main(String[] args) {
  23.       
  24. try {
  25. withdataindex();
  26. withoutindex();
  27. System.out.println(" index sucess.");
  28. } catch (IOException e) {
  29. e.printStackTrace();
  30. }
  31. }
  32. public static void withdataindex() throws IOException
  33. {
  34. File file = new File(Text_File_Path);  // 原始文件
  35.         Directory dir = FSDirectory.getDirectory(Dest_Index_Path,false); // 索引目录
  36.         Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
  37.         IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
  38.         TextIndex.setUseCompoundFile(true);
  39. Document document = new Document();                              // 新建空文档
  40. Field field_name = new Field("path", file.getName(), 
  41. Field.Store.YES,Field.Index.UN_TOKENIZED);
  42. document.add(field_name);                                        // 添加文件名域
  43. FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
  44. int len=inputfile.available();
  45. byte[] buffer = new byte[len]; 
  46. inputfile.read(buffer);                                          // 读取文件内容
  47. inputfile.close();
  48. String contentext = new String(buffer);
  49. Field field_content = new Field( "content", contentext,          // 文本域保存内容,不建立检索
  50.                          Field.Store.YES,Field.Index.TOKENIZED );
  51. document.add(field_content);                                    // 添加文件内容域
  52. TextIndex.addDocument(document);                                // 添加索引文档
  53. TextIndex.optimize();
  54. TextIndex.close();
  55. System.out.println("########## 不创建内容 Index ##########");
  56. display(Dest_Index_Path , file.getName());
  57. }
  58. public static void withoutindex() throws IOException
  59. {
  60. File file = new File(Text_File_Path);  // 原始文件
  61.         Directory dir = FSDirectory.getDirectory(Dest_NoIndex_Path,false); // 索引目录
  62.         Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
  63.         IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
  64.         TextIndex.setUseCompoundFile(true);
  65. Document document = new Document();                              // 新建空文档
  66. Field field_name = new Field("path", file.getName(), 
  67. Field.Store.YES,Field.Index.UN_TOKENIZED);
  68. document.add(field_name);                                        // 添加文件名域
  69. FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
  70. int len=inputfile.available();
  71. byte[] buffer = new byte[len]; 
  72. inputfile.read(buffer);                                          // 读取文件内容
  73. inputfile.close();
  74. String contentext = new String(buffer);
  75. Field field_content = new Field( "content", contentext,          // 文本域保存内容,分词索引
  76.                          Field.Store.YES,Field.Index.NO );
  77. document.add(field_content);                                    // 添加文件内容域
  78. TextIndex.addDocument(document);                                // 添加索引文档
  79. TextIndex.optimize();
  80. TextIndex.close();
  81. System.out.println("########## 建立内容 Index  ##########");
  82. display(Dest_Index_Path , file.getName());
  83. }
  84. public static void display(String indexpath, String words) throws IOException
  85. { // 显示结果
  86. try {
  87. IndexSearcher searcher = new IndexSearcher( indexpath ); // 检索器
  88. Term term = new Term("path", words );                          // 单词项
  89. Query query = new TermQuery(term);                             // 检索单元 
  90. System.out.println("Query  words:");
  91. System.out.println("  " + query.toString());
  92. Hits hits = searcher.search(query);                            // 提交检索
  93. System.out.println("Search result:");
  94. for(int i=0; i < hits.length(); i++)                           // 输出结果
  95. {
  96. System.out.println("  Path: " + hits.doc(i).getField("path").stringValue());
  97. if( hits.doc(i).getField("content")!= null)
  98.   System.out.println("  Content: " + hits.doc(i).getField("content").stringValue());
  99. }
  100. } catch (IOException e)
  101. {
  102. e.printStackTrace();
  103. }
  104. }
  105. }