package com.lucene;
import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestLucene {
private static final File INDEX_PATH = new File(".\\index");// 索引文件位置
private static final Analyzer ANALYZER = new WhitespaceAnalyzer(Version.LUCENE_35);
//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
//Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_35);
static boolean buildIndex() {
File readFile = new File(".\\duomicibiao.txt");
HashMap<String, String> words = readFile(readFile);
Document doc;
if (words != null) {
try {
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, ANALYZER);
IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_PATH), writerConfig);
Set<String> keys = words.keySet();
for (Iterator<String> it = keys.iterator(); it.hasNext();) {
String key = it.next();
doc = new Document();
Field index = new Field("index", key, Field.Store.YES,Field.Index.ANALYZED);
Field contents = new Field("contents", words.get(key),Field.Store.YES, Field.Index.NO);
doc.add(index);
doc.add(contents);
writer.addDocument(doc);
}
writer.close();// 这里不关闭建立索引会失败
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
} else {
System.out.println("文件读取错误");
return false;
}
}
static boolean noIndex() {
File[] indexs = INDEX_PATH.listFiles();
if (indexs.length == 0) {
return true;
} else {
return false;
}
}
static boolean deleteIndex() {
File[] index = INDEX_PATH.listFiles();
try {
for (File file : index) {
file.delete();
}
} catch (Exception e) {
e.printStackTrace();
return false;
}
return true;
}
static HashMap<String, String> readFile(File file) {
InputStream in = null;
InputStreamReader inR = null;
BufferedReader br = null;
HashMap<String, String> wordsMap = new HashMap<String, String>();
try {
in = new FileInputStream(file);
inR = new InputStreamReader(in, "gbk");
br = new BufferedReader(inR);
String line;
while ((line = br.readLine()) != null) {
wordsMap.put(line.trim(), line.trim());
}
return wordsMap;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
try {
if (in != null)
in.close();
if (inR != null)
inR.close();
if (br != null)
br.close();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
static void search(String queryStr,int hitsPerPage) {
try {
IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_PATH));
IndexSearcher searcher = new IndexSearcher(reader);
//Query query = new QueryParser(Version.LUCENE_35, "contents", ANALYZER).parse(queryStr + "~");
Query query = new FuzzyQuery(new Term("index", queryStr));
//Query query = new TermQuery(new Term("index", queryStr));
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
if(hits.length > 0){
for (int i = 0; i < hits.length; i++) {
Document result = searcher.doc(hits[i].doc);
System.out.println("【"+ i +"】" + hits[i].score + " index:" + result.get("index") + " contents:" + result.get("contents"));
}
}else{
System.out.println("未找到结果");
}
} catch (Exception e) {
System.out.println("Exception");
}
}
public static void main(String[] args) {
//deleteIndex();
if (noIndex()) {
buildIndex();
}
int resultSize = 20;
search("张杰",resultSize);
}
}
分享到:
相关推荐
Lucene3.5全部源码,打包jar文件,可以直接打开查看源码,Lucene开发必备
lucene3.5 IKAnalyzer3.2.5 实例中文分词通过,目前在网上找的lucene 和IKAnalyzer 的最新版本测试通过。内含:示例代码,以及最新jar包。 lucene lucene3.5 IKAnalyzer IKAnalyzer3.2.5 jar 中文 分词
luke3.5 可查看lucene3.5索引
lucene3.5的创建和增删改查的工程
lucene 官网的3.5源代码,标准代码,未做任何处理
基于新发布的Lucene3.5 做的一个实例,仅供参考。
lucene3.5高亮
几个经常用到的包,有中文分词器,核心包,高亮包...等等
chm格式的Lucene帮助文档,Lucene3.5
lucene3.5全文检索案例lucene+demo
用爬虫爬下来的Lucene3.5 api 可以正常使用
lucene3.5的API,虽然是英文版,不过还是很有用的
这个为一个lucene3.5所写的程序例子,有助于刚入门的学习lucene的人,更快的的进入到lucene的学习中去。
Lucene 3.5&API,最新版
lucene3.5中文分词,详细的注释,适用于初学者。有兴趣的可以看看。
基于新发布的Lucene3.5的学习,对部分模块的研究及学习心得。
介绍lucene3.5的相关技术,包括基本用法、分析器、索引建立与查询,扩展的高亮、分页、以及solr3.5的相关用法
solr_lucene3.5_lukeall-3.5.0.jar.zip
Lucene3.5视频教程(内含分享链接) 一共50集, 包含各部分讲解及源码