LuceneStandardAnalyzerText.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:3k
源码类别:
搜索引擎
开发平台:
Java
- package chapter8;
- import java.io.IOException;
- import java.io.StringReader;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.*;
- import org.apache.lucene.analysis.StopAnalyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.Token;
- import java.util.*;
- import java.io.*;
- public class LuceneStandardAnalyzerText {
- private static String Dest_Index_Path = "D:\workshop\TextIndex";
- static protected String textdetail = "The Lucene works very well,it is very useful.我们认为很好用。" ;
- static protected String chinesedetail = "中文文档中最基础的结构是句子、短语、词汇、单个的汉字。中文环境的句子通常可以利用标点符号来分隔。" ;
- public static final String[] self_stop_words = {
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with",
- "very"
- };
- public static void IndexBuilder() {
- try {
- Analyzer TextAnalyzer = new StandardAnalyzer();
- IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextAnalyzer,true);
- Document document = new Document();
- Field field_content = new Field("content", chinesedetail,
- Field.Store.YES,Field.Index.TOKENIZED);
- document.add(field_content);
- TextIndex.addDocument(document);
- ArrayList ItemList = new ArrayList();
- TokenStream stream = TextAnalyzer.tokenStream("content", new StringReader(chinesedetail));
- while(true)
- {
- Token item = stream.next();
- if(null == item ) break;
- System.out.print("{"+item.termText()+"} ");
- }
- TextIndex.optimize();
- TextIndex.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("");
- System.out.println("Index success");
- }
- /*================================================================
- * 名 称:QueryStandardAnalyzerTest
- * 功 能:构造检索查询器,使用指定的分析器对检索词进行分析,找到相应结果输出。
- ===============================================================*/
- public static void QueryStandardAnalyzerTest(){
- try {
- Analyzer analyzer = new StandardAnalyzer();
- IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
- String searchWords = "中文文档";
- QueryParser parser = new QueryParser("content",analyzer);
- try{
- Query query = parser.parse(searchWords);
- System.out.println(query.toString());
- //System.out.println(query.getClass());
- Hits hits = searcher.search(query);
- System.out.println("Search result:");
- for(int i=0; i < hits.length(); i++)
- {
- System.out.println(hits.doc(i).getField("content"));
- }
- } catch(ParseException e1){
- e1.printStackTrace();
- }
- }catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("Search success");
- }
- public static void main(String[] args) {
- IndexBuilder();
- QueryStandardAnalyzerTest();
- }
- }