http://sourceforge.net/projects/regain/files/
http://sourceforge.jp/projects/sfnet_regain/releases/?file_id=2419546
http://code.google.com/p/paoding/downloads/list
lucene3.0入门实例2009-12-07 21:33lucene3.0已于2009-11-25发布啦,但网上的入门实例都是针对lucene3.0以前的,相对于以前的版本,貌似改动不小。
本人从零开始学习《lucene in action中文版》,并结合lucene3.0文档写了个入门实例,可供像我一样直接从lucene3.0开始学习的初学者参考!(变化大的地方用蓝字标出来了)
入门实例:
1.预处理:先把网上下载的一个《三国演义》电子书“三国演义.txt”(可用其他代替,呵呵)切割成多个小文件。
/**
* @author ht
* 预处理
*
*/
public class FilePreprocess {
public static void main(String[] arg){
String outputpath = "D:\\test\\small\\";//小文件存放路径
String filename = "D:\\test\\三国演义.txt";//原文件存放路径
if(!new File(outputpath).exists()){
new File(outputpath).mkdirs();
}
splitToSmallFiles(new File(filename), outputpath);
}
/**大文件切割为小的
* @param file
* @param outputpath
*/
public static void splitToSmallFiles(File file ,String outputpath){
int filePointer = 0;
int MAX_SIZE = 10240;//小文件大小
String filename = "output";//小文件的文件名前缀
BufferedWriter writer = null;
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
StringBuffer buffer = new StringBuffer();
String line = reader.readLine();
while(line != null){
buffer.append(line).append("\r\n");
if(buffer.toString().getBytes().length>=MAX_SIZE){
writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));
writer.write(buffer.toString());
writer.close();
filePointer++;
buffer=new StringBuffer();
}
line = reader.readLine();
}
writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));
writer.write(buffer.toString());
writer.close();
System.out.println("The file hava splited to small files !");
} catch (FileNotFoundException e) {
System.out.println("file not found !");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
2.用lucene3.0生成索引类:用lencene3.0对生成的多个小文件进行索引,中文分词用的是lucene3.0自带的StandardAnalyzer.
/**
* @author ht
* 索引生成
*
*/
public class Indexer {
private static String INDEX_DIR = "D:\\test\\index";//索引存放目录
private static String DATA_DIR = "D:\\test\\small\\";//小文件存放的目录
public static void main(String[] args) throws Exception {
long start = new Date().getTime();
int numIndexed = index(new File(INDEX_DIR), new File(DATA_DIR));//调用index方法
long end = new Date().getTime();
System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
}
/**索引dataDir下的.txt文件,并储存在indexDir下,返回索引的文件数量
* @param indexDir
* @param dataDir
* @return int
* @throws IOException
*/
public static int index(File indexDir, File dataDir) throws IOException {
if (!dataDir.exists() || !dataDir.isDirectory()) {
throw new IOException(dataDir + " does not exist or is not a directory");
}
IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_CURRENT), true,
IndexWriter.MaxFieldLength.LIMITED);
indexDirectory(writer, dataDir);//调用indexDirectory方法
int numIndexed = writer.numDocs();
writer.optimize();
writer.close();
return numIndexed;
}
/**循环遍历目录下的所有.txt文件并进行索引
* @param writer
* @param dir
* @throws IOException
*/
private static void indexDirectory(IndexWriter writer, File dir)
throws IOException {
File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
if (f.isDirectory()) {
indexDirectory(writer, f); // recurse
} else if (f.getName().endsWith(".txt")) {
indexFile(writer, f);
}
}
}
/**对单个txt文件进行索引
* @param writer
* @param f
* @throws IOException
*/
private static void indexFile(IndexWriter writer, File f)
throws IOException {
if (f.isHidden() || !f.exists() || !f.canRead()) {
return;
}
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = new Document();
doc.add(new Field("contents",new FileReader(f)));
doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
}
3.查询类:查询“玄德”!
/**
* @author ht
* 查询
*
*/
public class Searcher {
private static String INDEX_DIR = "D:\\test\\index\\";//索引所在的路径
private static String KEYWORD = "玄德";//关键词
private static int TOP_NUM = 100;//显示前100条结果
public static void main(String[] args) throws Exception {
File indexDir = new File(INDEX_DIR);
if (!indexDir.exists() || !indexDir.isDirectory()) {
throw new Exception(indexDir +
" does not exist or is not a directory.");
}
search(indexDir, KEYWORD);//调用search方法进行查询
}
/**查询
* @param indexDir
* @param q
* @throws Exception
*/
public static void search(File indexDir, String q) throws Exception {
IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir),true);//read-only
String field = "contents";
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, new StandardAnalyzer(Version.LUCENE_CURRENT));
Query query = parser.parse(q);
TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);
long start = new Date().getTime();// start time
is.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println(hits.length);
for (int i = 0; i < hits.length; i++) {
Document doc = is.doc(hits[i].doc);//new method is.doc()
System.out.println(doc.getField("filename")+" "+hits[i].toString()+" ");
}
long end = new Date().getTime();//end time
System.out.println("Found " + collector.getTotalHits() +
" document(s) (in " + (end - start) +
" milliseconds) that matched query '" +
q + "':");
}
}
分享到:
相关推荐
lucene入门小例子
Lucene入门与使用,非常简单,适合入门
lucenetest.rar,lucene,全文检索,lucene例子 lucenetest.rar,lucene,全文检索,lucene例子lucenetest.rar,lucene,全文检索,lucene例子
lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子
lucene3.0 例子lucene3.0 例子 lucene3.0 例子 ,很好的学习,只有原代原,jar 包自己加上去就OK了
我自己写的一个lucene搜索引擎的简单入门例子源代码 对照lucene,相当的易懂。api我这也有,含JE分词器。
Lucene5.2.1 入门学习例子. 这是别人的例子源码。可以参考。内有使用说明。
这里包含lucene的 jar包 ,API帮助文档CHM格式 ,还有一小段入门代码
Lucene操作数据库例子,通过JDBC程序+Lucene
lucene文档例子
Luene2.4版本在以前的基础上进行了不少的改动,性能上也提升了不少,这里边学习边写了一个简单的例子,给初学者行个方便!
一个基于LUCENE搜索引擎项目例子一个基于LUCENE搜索引擎项目例子一个基于LUCENE搜索引擎项目例子
lucene入门到项目开发.docx lucene入门到项目开发.docx
有关lucene入门学习的PPT以及算法和倒排原理
Lucene的功能请打,方法众多。主要介绍了Lucene的功能模块及其调用代码,实际使用中可以具体修改。最后还有一个常见的Lucene实例与解析。
Lucene入门demo,lucene简单的应用
lucene入门例子,有创建索引和检索。详情:http://blog.csdn.net/authorzhh/article/details/7869806
Lucene使用lucene入门[归类].pdf
全文检索lucene入门,结合全文检索原理分析lucene.帮助你更快掌握lucene