LuceneIndexLocalDisk.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:4k
- package chapter5;
- import java.io.IOException;
- import java.io.File;
- import java.io.FileReader;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- /*******************************************************************
- * 本代码完成本地指定目录的遍历和文件查找。对指定后缀的文件进行分析,利用Lucene建立
- * 索引,为后续检索使用做好准备。
- *******************************************************************/
- public class LuceneIndexLocalDisk {
- private static String Dest_Index_Path = "D:\workshop\TextIndex";
- private static String Text_File_Path = "D:\workshop\ch2\002\";
- /*========================================================
- * 主函数,指定索引目录和待分析的目录,生成Lucene索引
- *========================================================*/
- public static void main(String[] args) {
-
- File indexpath = new File(Dest_Index_Path);
- File localPath = new File(Text_File_Path);
-
- try {
- int nums = indexBuilder(indexpath,localPath);
- System.out.println("Index Finished " + nums + " docs");
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- /*========================================================
- * 索引创建函数,生成IndexWriter创建索引,调用子目录索引函数,并优化
- * 存储本地磁盘索引
- *========================================================*/
- public static int indexBuilder( File indexPath , File localPath )
- throws IOException{
- if(!localPath.exists() || !localPath.isDirectory() || !localPath.canRead()){
- throw new IOException(localPath + "不存在或者不允许访问" );
- }
- System.out.println("目标路径完好");
- IndexWriter FSWriter = new IndexWriter(indexPath,new StandardAnalyzer(),true);
- FSWriter.setUseCompoundFile(true);
- SubindexBuilder(FSWriter,localPath);
- int num = FSWriter.docCount();
- FSWriter.optimize();
- FSWriter.close();
- return num;
- }
- /*========================================================
- * 递归函数,递归分析目录,如果找到子目录,继续递归;如果找到文件分析索引
- *========================================================*/
- private static void SubindexBuilder(IndexWriter fswriter,File subPath)
- throws IOException{
-
- File[] filelist = subPath.listFiles();
-
- System.out.println(subPath.getAbsolutePath() + "路径个数 " + filelist.length);
- for(int i = 0; i< filelist.length;i++){
- File file = filelist[i];
- if(file.isDirectory()){
- SubindexBuilder(fswriter,file);
- } else if(IsValidType(file.getName())){
- fileindexBuilder(fswriter,file);
- }
- }
- }
-
- /*========================================================
- * 创建RAM内存索引,生成并添新文档。合并到本地磁盘索引当中
- *========================================================*/
- private static void fileindexBuilder(IndexWriter fswriter,File subfile)
- throws IOException{
-
- if( subfile.isHidden() || !subfile.exists() || !subfile.canRead()){
- return ;
- }
-
- Directory ramdirectory = new RAMDirectory();
- IndexWriter RAMWriter = new IndexWriter(ramdirectory,new StandardAnalyzer(),true);
-
- // File file = new File(subfile);
- FileReader fpReader = new FileReader(subfile);
-
- System.out.println("创建索引" + subfile.getCanonicalPath());
- Document document = new Document();
-
- Field field_name = new Field("name", subfile.getName(),
- Field.Store.YES,Field.Index.UN_TOKENIZED);
- document.add(field_name);
-
- Field field_path = new Field("path", subfile.getAbsolutePath(),
- Field.Store.YES,Field.Index.UN_TOKENIZED);
- document.add(field_path);
- Field field_content = new Field("content", fpReader);
- document.add(field_content);
- RAMWriter.addDocument(document);
- RAMWriter.close();
- fswriter.addIndexes(new Directory[]{ramdirectory});
- }
- /*========================================================
- * 判断当前文件名是否符合文件后缀要求
- *========================================================*/
- private static boolean IsValidType(String name){
- if(name.endsWith(".txt") || name.endsWith(".html")|| name.endsWith(".ini") ||name.endsWith(".conf")){
- return true;
- } else {
- return false;
- }
- }
- }