package com.zhangzhanlei.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestIndexer
{
private String fieldName;
private String endStr;
public TestIndexer(String fieldName,String endStr)
{
this.fieldName = fieldName;
this.endStr = endStr;
}
/**
* lucene 索引创建 主方法
* @param indexDir
* @param dataDir
* @return
* @throws IOException
*/
public int index(File indexDir,File dataDir) throws IOException
{
if(!dataDir.exists()||!dataDir.isDirectory())
{
throw new IOException(dataDir+":does not exist or is not a directory");
}
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_46,analyzer);
Directory directory = FSDirectory.open(indexDir);
if(IndexWriter.isLocked(directory))
{
IndexWriter.unlock(directory);
}
IndexWriter writer = new IndexWriter(directory,indexWriterConfig);
writer.deleteAll();
indexDirectory(writer,dataDir);
int numIndexed = writer.numDocs();
writer.close();
return numIndexed;
}
public void indexDirectory(IndexWriter writer,File dir) throws IOException
{
File [] fiels = dir.listFiles();
for(File file : fiels)
{
if(file.isDirectory())
{
indexDirectory(writer,file);
}
else if (file.getName().endsWith(this.endStr))
{
indexFile(writer,file);
}
}
}
/**
* 对文件创建索引
* @param writer
* @param f
* @throws IOException
*/
public void indexFile(IndexWriter writer,File f) throws IOException
{
if(f.isHidden()||!f.exists()||!f.canRead())
{
return;
}
System.out.println("Indexing: "+f.getCanonicalPath());
getTXT (writer,f,"GBK");
}
/***
* 读取文件,为单行加入索引
* @param file
* @param charset
* @return
* @throws IOException
*/
public void getTXT (IndexWriter writer,File file,String charset) throws IOException
{
FileInputStream fileInputStream = new FileInputStream(file);
InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream,charset);
BufferedReader reader = new BufferedReader(inputStreamReader);
String line = new String();
while((line=reader.readLine())!=null)
{
Document doc = new Document();
doc.add(new Field("line",line,Field.Store.YES,Field.Index.ANALYZED));
writer.addDocument(doc);
}
reader.close();
}
/**
* @param args
*/
public static void main(String[] args)
{
TestIndexer indexer = new TestIndexer("filepath",".txt");
try
{
File indexDir = new File ("d:\\lucenetest\\index");
File dataDir = new File ("d:\\lucenetest\\file");
int result = indexer.index(indexDir, dataDir);
System.out.println("indexing : " +result + " files.");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
Indexing: D:\lucenetest\file\bwpf814.txt
Indexing: D:\lucenetest\file\bwpf815.txt
Indexing: D:\lucenetest\file\bwpf816.txt
Indexing: D:\lucenetest\file\bwpf817.txt
Indexing: D:\lucenetest\file\bwpf818.txt
indexing : 45550 files.
package com.zhangzhanlei.lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestSearcher
{
private File indexDir;
private String fieldName;
public TestSearcher (File indexDir,String fieldName)
{
this.indexDir = indexDir;
this.fieldName = fieldName;
}
public void searcher(String keywords) throws IOException, ParseException
{
Directory fsDir = FSDirectory.open(indexDir);
IndexReader reader = IndexReader.open(fsDir);
IndexSearcher is = new IndexSearcher(reader);
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);
QueryParser queryParser = new QueryParser(Version.LUCENE_46,fieldName,analyzer);
Query query = queryParser.parse(keywords);
TopDocs docs = is.search(query, 1000);
ScoreDoc [] scoreDoc = docs.scoreDocs;
System.out.println("Found "+docs.totalHits+" documents that matched query '"+keywords +"'");
for(int i = 0 ;i<scoreDoc.length;i++)
{
Document miDoc = reader.document(scoreDoc[i].doc);
System.out.println(miDoc.get(fieldName));
}
reader.close();
}
/**
* @param args
*/
public static void main(String[] args)
{
TestSearcher searcher = new TestSearcher(new File("d:\\lucenetest\\index"),"line");
try
{
searcher.searcher("查询超时");
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
Found 750 documents that matched query '查询超时'
Caused by: com.microsoft.sqlserver.jdbc.SQLServerException: 查询超时。
Caused by: com.microsoft.sqlserver.jdbc.SQLServerException: 查询超时。
相关推荐
全局搜索lucene4.6版本开发需要的jar包
创建索引,查询,过滤,同义词,近及时搜索,里面有4.6与5.0的jar文件,请自行引入
18.lucene4.6索引的相关操作 19.lucene4.6的各种Query(1) 20.lucene4.6的各种Query(2) 21.lucene4.6的各种Query(3) 22.solr4.6的快速搭建 23.solr4.6索引的相关操作 24.solr4.6搜索的相关参数功能(1) 25.solr4.6搜索...
lucene4.6所有jar包 lucene 搜索引擎 可以网上找教程,但是貌似网上的都太旧了, 4.6改动挺大的,构造函数都有很大差异~~无语啊!
lucene4.6实例,其中用到的jar包到官网下载.
lucene
Lucene是一套用于全文检索和搜寻的开源程式库,由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。 从之前发布其他chm文件下载用户的反映看,有不少朋友反映下载后...
对于抓取的数据,进行去重,去标签,然后利用lucene 和 solr 进行索引和搜索。 课程的最大特点是内容新颖全面而又通俗易懂。对于实际搜索引擎所涉及的各种核心技术都有全面细致的介绍,除了作为搜索系统核心的网络...
NULL 博文链接:https://love-66521.iteye.com/blog/2039912
NULL 博文链接:https://sharp-fcc.iteye.com/blog/2038339
lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习...
Lucene4.6版本,适用于Lucene的所有研究,以及中文分词功能
Lucene的的学习资料及案例,包括一个lucene的学习资料总结。供大家学习使用,也有本人写的一个小案例。
Lucene.net学习.docx
本人的Lucene2.9学习笔记 本人的Lucene2.9学习笔记 本人的Lucene2.9学习笔记 本人的Lucene2.9学习笔记本人的Lucene2.9学习笔记本人的Lucene2.9学习笔记 本人的Lucene2.9学习笔记
这里面是lucene的相关学习资料,特别适合新手学习。
lucene相关学习资料,包括lucene学习笔记,lucene添加中文分词等
主要包含Lucene.net 学习笔记和 Lucene.net 系列的代码,一直一些简单的程序
这是我通过对Lucene3.3.0源码进行了简单解读,依据应用详细的做了很多Demo,大家可以一道学习。
lucene4.8学习资料和案例,自己学习整理。整理了各种查询,包括分页、排序等!对100W,500W,1000W三个级别的数据量分别进行了测试!