LuceneStopAnalyzerText.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:4k
源码类别:

搜索引擎

开发平台:

Java

  1. package chapter8;
  2. import java.io.IOException;
  3. import org.apache.lucene.document.Field;
  4. import org.apache.lucene.document.Document;
  5. import org.apache.lucene.index.IndexWriter;
  6. import org.apache.lucene.queryParser.ParseException;
  7. import org.apache.lucene.queryParser.QueryParser;
  8. import org.apache.lucene.search.Hits;
  9. import org.apache.lucene.search.IndexSearcher;
  10. import org.apache.lucene.search.Query;
  11. import org.apache.lucene.analysis.Analyzer;
  12. import org.apache.lucene.analysis.StopAnalyzer;
  13. import org.apache.lucene.analysis.TokenStream;
  14. import org.apache.lucene.analysis.Token;
  15. import org.apache.lucene.analysis.WhitespaceAnalyzer;
  16. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  17. import java.util.*;
  18. import java.io.*;
  19. public class LuceneStopAnalyzerText {
  20. private static String Dest_Index_Path = "D:\workshop\TextIndex";
  21. static protected String textdetail = "The Lucene works very well,it is very useful." ;
  22.  public static final String[] self_stop_words = {
  23.     "a", "an", "and", "are", "as", "at", "be", "but", "by",
  24.     "for", "if", "in", "into", "is", "it",
  25.     "no", "not", "of", "on", "or", "such",
  26.     "that", "the", "their", "then", "there", "these",
  27.     "they", "this", "to", "was", "will", "with",
  28.     "very"
  29.   };
  30.  
  31. public static void IndexBuilder() {
  32. try {
  33. // Analyzer TextAnalyzer = new StopAnalyzer();
  34. Analyzer TextAnalyzer = new StopAnalyzer(self_stop_words);   // 构造自定义停用词表的分析器
  35. IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextAnalyzer,true);
  36. Document document = new Document();                     // 生成空文档
  37. Field field_content = new Field("content", textdetail,  // 创建域对象 
  38. Field.Store.YES,Field.Index.TOKENIZED);
  39. document.add(field_content);
  40. TextIndex.addDocument(document);                // 添加索引文档
  41. ArrayList ItemList = new ArrayList();           // 结果集合   
  42. TokenStream stream = TextAnalyzer.tokenStream("content", new StringReader(textdetail));
  43. while(true)
  44. {
  45. Token item = stream.next();
  46. if(null == item ) break;
  47. System.out.print("{"+item.termText()+"} ");
  48. }
  49. TextIndex.optimize();
  50. TextIndex.close();
  51. } catch (IOException e) {
  52. e.printStackTrace();
  53. }
  54. System.out.println("");
  55. System.out.println("Index success");
  56. }
  57. /*================================================================
  58.  * 名 称:QueryStopAnalyzerTest
  59.  * 功 能:构造检索查询器,使用指定的分析器对检索词进行分析,找到相应结果输出。
  60.  ===============================================================*/
  61. public static void QueryStopAnalyzerTest(){
  62. try {
  63. Analyzer analyzer = new StopAnalyzer();
  64. IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
  65.     String searchWords = "The Lucene works";
  66.     QueryParser parser = new QueryParser("content",analyzer); 
  67.     
  68.     try{
  69.     Query query = parser.parse(searchWords);
  70. System.out.println(query.toString());
  71. System.out.println(query.getClass());
  72. Hits hits = searcher.search(query);
  73. System.out.println("Search result:");
  74. for(int i=0; i < hits.length(); i++)
  75. {
  76. System.out.println(hits.doc(i).getField("content"));
  77. }
  78.     } catch(ParseException e1){
  79. e1.printStackTrace();
  80.     }
  81. }catch (IOException e) {
  82. e.printStackTrace();
  83. }
  84. System.out.println("Search success");
  85. }
  86. public static void main(String[] args) {
  87. IndexBuilder();
  88. QueryStopAnalyzerTest();
  89. }
  90. }