LuceneStopAnalyzerText.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:4k
- package chapter8;
- import java.io.IOException;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.StopAnalyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.Token;
- import org.apache.lucene.analysis.WhitespaceAnalyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import java.util.*;
- import java.io.*;
- public class LuceneStopAnalyzerText {
-
- private static String Dest_Index_Path = "D:\workshop\TextIndex";
- static protected String textdetail = "The Lucene works very well,it is very useful." ;
- public static final String[] self_stop_words = {
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with",
- "very"
- };
-
- public static void IndexBuilder() {
- try {
- // Analyzer TextAnalyzer = new StopAnalyzer();
- Analyzer TextAnalyzer = new StopAnalyzer(self_stop_words); // 构造自定义停用词表的分析器
-
- IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextAnalyzer,true);
- Document document = new Document(); // 生成空文档
- Field field_content = new Field("content", textdetail, // 创建域对象
- Field.Store.YES,Field.Index.TOKENIZED);
- document.add(field_content);
- TextIndex.addDocument(document); // 添加索引文档
- ArrayList ItemList = new ArrayList(); // 结果集合
- TokenStream stream = TextAnalyzer.tokenStream("content", new StringReader(textdetail));
- while(true)
- {
- Token item = stream.next();
- if(null == item ) break;
- System.out.print("{"+item.termText()+"} ");
- }
- TextIndex.optimize();
- TextIndex.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("");
- System.out.println("Index success");
-
- }
-
- /*================================================================
- * 名 称:QueryStopAnalyzerTest
- * 功 能:构造检索查询器,使用指定的分析器对检索词进行分析,找到相应结果输出。
- ===============================================================*/
- public static void QueryStopAnalyzerTest(){
-
- try {
- Analyzer analyzer = new StopAnalyzer();
-
- IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
- String searchWords = "The Lucene works";
- QueryParser parser = new QueryParser("content",analyzer);
-
- try{
- Query query = parser.parse(searchWords);
- System.out.println(query.toString());
- System.out.println(query.getClass());
-
- Hits hits = searcher.search(query);
-
- System.out.println("Search result:");
-
- for(int i=0; i < hits.length(); i++)
- {
- System.out.println(hits.doc(i).getField("content"));
- }
- } catch(ParseException e1){
- e1.printStackTrace();
- }
- }catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("Search success");
- }
-
- public static void main(String[] args) {
- IndexBuilder();
- QueryStopAnalyzerTest();
- }
- }