`
sungang_1120
  • 浏览: 313305 次
  • 性别: Icon_minigender_1
  • 来自: 成都
社区版块
存档分类

lucene 3.0.0一个子目录及其子目录的文件转换成Document对象 并添加多索引库中进行查询

 
阅读更多
lucene3.0.0 一个子目录及其子目录的文件转换成Document对象   并添加多索引库中进行查询


package com.txt.test2;

import java.io.File;
import java.io.FileReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
//一个子目录及其子目录的文件转换成Document对象   并添加多索引库中进行查询
public class LuceneTest2 {
private String path = "f:"+File.separator+"cd";
private File storeFile  = new File("f:"+File.separator+"indexDir7");
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
private IndexWriter writer;
@Test
public void create () throws Exception {
Directory directory = new SimpleFSDirectory(storeFile);
writer = new IndexWriter(directory, analyzer,true, MaxFieldLength.LIMITED);
File file = new File(path);
showFile(file);

writer.close();
}
//获取file目录下的文件及其子目录
public void showFile(File file) throws Exception{
if (file == null) {
return ;
}else if (file.isDirectory()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
//递归
showFile(files[i]);
}
}else {
System.out.println(file.getAbsolutePath());
//建立索引  将文件转换成doc..对象
Document document = new Document();
document.add(new Field("fileName", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("filePath", file.getAbsolutePath(), Store.YES, Index.ANALYZED));
//不做存储
document.add(new Field("content", new FileReader(file)));

writer.addDocument(document);
}
}

//查询
@Test
public void serach() throws Exception{
Directory directory = new SimpleFSDirectory(storeFile);
IndexSearcher searcher = new IndexSearcher(directory,true);
QueryParser parser = new QueryParser(Version.LUCENE_30, "filePath", analyzer);
String key = "笔记";
Query query = parser.parse(key);
TopDocs tdDocs = searcher.search(query, 100);
System.out.println("查询的内容是:"+key);
System.out.println("一共命中了多少次:"+tdDocs.totalHits);
System.out.println();
if (tdDocs.scoreDocs != null) {
for (int i = 0; i < tdDocs.scoreDocs.length; i++) {
ScoreDoc sDoc = tdDocs.scoreDocs[i];
System.out.println("文档编号的索引:"+sDoc.doc);
System.out.println("得分:"+sDoc.score);

Document document = searcher.doc(sDoc.doc);
System.out.println("fileName名称是:"+document.get("fileName"));
System.out.println("filePath路径是:"+document.get("filePath"));
System.out.println("content内容是:"+document.get("content"));
}
}else {
System.out.println("没有要查找的内容...");
}
searcher.close();
}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics