LuceneIndexTypes.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:7k
源码类别:

搜索引擎

开发平台:

Java

  1. package chapter5;
  2. import java.util.Date;
  3. import java.io.*;
  4. import org.apache.lucene.search.Hits;
  5. import org.apache.lucene.search.IndexSearcher;
  6. import org.apache.lucene.search.Query;
  7. import org.apache.lucene.search.TermQuery;
  8. import org.apache.lucene.store.Directory; 
  9. import org.apache.lucene.store.FSDirectory;
  10. import org.apache.lucene.document.Field;
  11. import org.apache.lucene.document.Document;
  12. import org.apache.lucene.index.IndexWriter;
  13. import org.apache.lucene.index.IndexReader;
  14. import org.apache.lucene.index.Term;
  15. import org.apache.lucene.analysis.Analyzer;
  16. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  17. import org.apache.lucene.analysis.SimpleAnalyzer;
  18. public class LuceneIndexTypes {
  19. private static String Dest_Index_Path = "D:\workshop\TextIndexstore";
  20. private static String Dest_Index_Path_un = "D:\workshop\TextIndexunstore";
  21. private static String Dest_Index_Path_Zip = "D:\workshop\TextIndexZip";
  22. private static String Text_File_Path = "D:\workshop\ch2\wholeaximofu.txt";
  23. public static void main(String[] args) {
  24.       
  25. try {
  26. indexstore();
  27. indexunstore();
  28. indexcompress();
  29. System.out.println(" index sucess.");
  30. } catch (IOException e) {
  31. e.printStackTrace();
  32. }
  33. }
  34. public static void indexstore() throws IOException
  35. {
  36. File file = new File(Text_File_Path);  // 原始文件
  37.         Directory dir = FSDirectory.getDirectory(Dest_Index_Path,false); // 索引目录
  38.         Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
  39.         IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
  40.         TextIndex.setUseCompoundFile(true);
  41. Document document = new Document();                              // 新建空文档
  42. Field field_name = new Field("path", file.getName(), 
  43. Field.Store.YES,Field.Index.UN_TOKENIZED);
  44. document.add(field_name);                                        // 添加文件名域
  45. FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
  46. int len=inputfile.available();
  47. byte[] buffer = new byte[len]; 
  48. inputfile.read(buffer);                                          // 读取文件内容
  49. inputfile.close();
  50. String contentext = new String(buffer);
  51. Field field_content = new Field( "content", contentext,          // 文本域保存内容
  52.                          Field.Store.YES,Field.Index.TOKENIZED );
  53. document.add(field_content);                                    // 添加文件内容域
  54. TextIndex.addDocument(document);                                // 添加索引文档
  55. TextIndex.optimize();
  56. TextIndex.close();
  57. System.out.println("########## Index Stored ##########");
  58. display(Dest_Index_Path , file.getName());
  59. }
  60. public static void indexunstore( ) throws IOException
  61. {
  62. File file = new File(Text_File_Path);  // 原始文件
  63.         Directory dir = FSDirectory.getDirectory(Dest_Index_Path_un,false); // 索引目录
  64.         Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
  65.         IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
  66.         TextIndex.setUseCompoundFile(true);
  67. Document document = new Document();                              // 新建空文档
  68. Field field_name = new Field("path", file.getName(), 
  69. Field.Store.YES,Field.Index.UN_TOKENIZED);
  70. document.add(field_name);                                        // 添加文件名域
  71. FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
  72. int len=inputfile.available();
  73. byte[] buffer = new byte[len]; 
  74. inputfile.read(buffer);                                          // 读取文件内容
  75. inputfile.close();
  76. String contentext = new String(buffer);
  77. Field field_content = new Field( "content", contentext,          // 文本域不保存内容
  78.                          Field.Store.NO,Field.Index.TOKENIZED );
  79. document.add(field_content);                                    // 添加文件内容域
  80. TextIndex.addDocument(document);                                // 添加索引文档
  81. TextIndex.optimize();
  82. TextIndex.close();
  83. System.out.println("########## Index UnStored ##########");
  84. display(Dest_Index_Path_un , file.getName());
  85. }
  86. public static void indexcompress() throws IOException
  87. {
  88. File file = new File(Text_File_Path);  // 原始文件
  89.         Directory dir = FSDirectory.getDirectory(Dest_Index_Path_Zip,false); // 索引目录
  90.         Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
  91.         IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
  92.         TextIndex.setUseCompoundFile(true);
  93. Document document = new Document();                              // 新建空文档
  94. Field field_name = new Field("path", file.getName(), 
  95. Field.Store.YES,Field.Index.UN_TOKENIZED);
  96. document.add(field_name);                                        // 添加文件名域
  97. FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
  98. int len=inputfile.available();
  99. byte[] buffer = new byte[len]; 
  100. inputfile.read(buffer);                                          // 读取文件内容
  101. inputfile.close();
  102. String contentext = new String(buffer);
  103. Field field_content = new Field( "content", contentext,          // 文本域保存内容
  104.                          Field.Store.COMPRESS,Field.Index.TOKENIZED );
  105. document.add(field_content);                                    // 添加文件内容域
  106. TextIndex.addDocument(document);                                // 添加索引文档
  107. TextIndex.optimize();
  108. TextIndex.close();
  109. System.out.println("########## Index Compress ##########");
  110. display(Dest_Index_Path_Zip , file.getName());
  111. }
  112. public static void display(String indexpath, String words) throws IOException
  113. { // 显示结果
  114. try {
  115. IndexSearcher searcher = new IndexSearcher( indexpath ); // 检索器
  116. Term term = new Term("path", words );                          // 单词项
  117. Query query = new TermQuery(term);                             // 检索单元 
  118. System.out.println("Query  words:");
  119. System.out.println("  " + query.toString());
  120. Hits hits = searcher.search(query);                            // 提交检索
  121. System.out.println("Search result:");
  122. for(int i=0; i < hits.length(); i++)                           // 输出结果
  123. {
  124. System.out.println("  Path: " + hits.doc(i).getField("path").stringValue());
  125. if( hits.doc(i).getField("content")!= null)
  126.   System.out.println("  Content: " + hits.doc(i).getField("content").stringValue());
  127. }
  128. } catch (IOException e)
  129. {
  130. e.printStackTrace();
  131. }
  132. }
  133. }