最近做lucence的应用,趁着一个节点的间歇,总结了下lucence中有关自定义搜索结果排序的相关代码,一来和大家共同探讨,二来也便于备忘。
众所周知,lucence默认的结果是根据Score从高到低,当Score相等时,则会根据建立索引时创建的docID由小到大排序。通过自定义搜索结果的排序,则可以实现完全按照真实业务的需要,自定义结果的排序。
下面以一个查询餐馆距离的例子配合代码进行讲解(该例很多地方都有,但是我参考的时候发现很多地方提供的例子都是不能直接运行的)。并提供可以直接运行的例子代码如下:
DistanceComparatorSource.java
package com.xxx.demo;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache.IntParser;
public class DistanceComparatorSource extends FieldComparatorSource{
private int x;
private int y;
public DistanceComparatorSource(int x,int y){
this.x = x;
this.y = y;
}
@Override
public FieldComparator newComparator(String fieldname,int numHits,
int sortPos,boolean reversed) throws IOException{
return new DistanceScoreDocLookupComparator(fieldname,numHits);
}
private class DistanceScoreDocLookupComparator extends FieldComparator{
private int[] xDoc,yDoc;
private float[] values;
private float bottom;
String fieldName;
public DistanceScoreDocLookupComparator(String fieldName,int numHits){
values = new float[numHits];
this.fieldName = fieldName;
}
private class DistanceXIntParser implements IntParser{
@Override
public int parseInt(String string){
return Integer.parseInt(string.split(",")[0]);
}
}
private class DistanceYIntParser implements IntParser{
@Override
public int parseInt(String string){
return Integer.parseInt(string.split(",")[1]);
}
}
@Override
public int compare(int slot1,int slot2){
if(values[slot1]<values[slot2]) return -1;
if(values[slot1]>values[slot2]) return 1;
return 0;
}
@Override
public int compareBottom(int doc) throws IOException{
float docDistance = getDistance(doc);
if(bottom<docDistance) return -1;
if(bottom>docDistance) return 1;
return 0;
}
@Override
public void copy(int slot,int doc) throws IOException{
values[slot] = getDistance(doc);
}
@Override
public void setBottom(int slot){
bottom = values[slot];
}
@Override
public void setNextReader(IndexReader reader,int docBase)
throws IOException{
xDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceXIntParser());
yDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceYIntParser());
}
@Override
public Float value(int slot){
return new Float(values[slot]);
}
private float getDistance(int doc){
int deltax = xDoc[doc] - x;
int deltay = yDoc[doc] - y;
return (float)Math.sqrt(deltax*deltax + deltay*deltay);
}
public int sortType(){
return SortField.CUSTOM;
}
}
public String toString(){
return "Distance from ("+x+","+y+")";
}
}
DistanceSortingTest.java
package com.xxx.demo;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class DistanceSortingTest extends TestCase{
private RAMDirectory directory;
private IndexSearcher searcher ;
private Query query;
protected void setUp() throws Exception{
directory = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33,new StandardAnalyzer(Version.LUCENE_33));
config.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory,config);
addPoint(writer,"El Charro","restaurant restaurant restaurant",1,2);//5
addPoint(writer,"Cafe Poca Cosa","restaurant",5,9);//25+81=106
addPoint(writer,"Los Betos","restaurant",9,6);//81+36=117
addPoint(writer,"Nico's Taco Shop","restaurant restaurant",3,8);//9+64=73
writer.close();
searcher = new IndexSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_33, "type", new StandardAnalyzer(Version.LUCENE_33));
query = parser.parse("type:restaurant");
}
private void addPoint(IndexWriter writer,String name,String type,int x,int y)
throws CorruptIndexException, IOException{
Document doc = new Document();
doc.add(new Field("name",name,Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("type",type,Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
public void testNormRestaurant() throws IOException{
TopDocs hits = searcher.search(query,10);
System.out.println("--------testNormRestaurant---------- ");
for(ScoreDoc doc : hits.scoreDocs){
System.out.println("docId:"+doc.doc+"score:"+doc.score+", name:"+searcher.doc(doc.doc).get("name"));
}
assertEquals("first","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[0].doc).get("name"));
assertEquals("second","Los Betos",searcher.doc(hits.scoreDocs[1].doc).get("name"));
assertEquals("third","Nico's Taco Shop",searcher.doc(hits.scoreDocs[2].doc).get("name"));
assertEquals("forth","El Charro",searcher.doc(hits.scoreDocs[3].doc).get("name"));
}
public void testNearestRestaurantToHome() throws IOException{
Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(0,0)));
TopDocs hits = searcher.search(query,null,10,sort);
System.out.println("--------testNearestRestaurantToHome---------- ");
for(ScoreDoc doc : hits.scoreDocs){
System.out.println("docId:"+doc.doc+"name:"+searcher.doc(doc.doc).get("name"));
}
assertEquals("cloest","El Charro",searcher.doc(hits.scoreDocs[0].doc).get("name"));
assertEquals("second","Nico's Taco Shop",searcher.doc(hits.scoreDocs[1].doc).get("name"));
assertEquals("third","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[2].doc).get("name"));
assertEquals("furthest","Los Betos",searcher.doc(hits.scoreDocs[3].doc).get("name"));
}
public void testNearestRestaurantToWork() throws IOException{
Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10,10)));
TopFieldDocs docs = searcher.search(query,null,3,sort);
assertEquals(4,docs.totalHits);
assertEquals(3,docs.scoreDocs.length);
FieldDoc fieldDoc = (FieldDoc)docs.scoreDocs[0];
assertEquals("(10,10) -> (9,6) = sqrt(17)",new Float(Math.sqrt(17)),fieldDoc.fields[0]);
Document document = searcher.doc(fieldDoc.doc);
assertEquals("Los Betos", document.get("name"));
}
}
分享到:
相关推荐
springboot整合lucence完整代码
最近要使用lucence,所以写了一个查了一下文档,根据博客上的代码写了一个小demo,希望能对需要的人有所帮助,带jar文件 直接放到eclipse 就可运行了
lucence搜索技术demo入门,关键字搜索,网页爬虫
Lucence的描述以及重要的几个方法详解说明,Lucence的应用以及实例
PHP运用java的lucence搜索引擎
lucence例子
基于Lucence的个性化搜索引擎研究;相关说明文档
一头扎进 lucence 视频教程 java1234出品 一头扎进 lucence 视频教程 java1234出品
lucence文档,非常有参考价值,用于lucence开发使用,搜索引擎
本资源用于快速了解lucence的工作原理,快速搭建一个体验环境.打开lucence的学习入门.
lucence教程,通过这份文档可以学习lucence的相关知识
学习Lucence和Hadoop的资料,Lucence in Action 和 Hadoop权威指南。
lucence学习笔记,中文的,适用于初学者学习,知识点案例较多。
lucence 索引合并 资料汇总 值得参考
lucence全文检索引擎 开发文档,视频教程,开发jar包。
lucence结果高亮显示的利器,方便开发
Lucence的高亮显示是通过给关键字加html标签实现高亮显示的,这种加了html标签的文本同样可以在Android的WebView中实现高亮显示。
示例中使用了最新版lucence,庖丁解牛分词器,庖丁解牛字典, lucence分页,为数据库表创建索引,为文件创建索引,搜索示例等整套的lucence使用方法,入门必须。
lucence资源技术,4.10以后的版本,建立索引,删除索引,读取OFFICE功能等。
学习lucence的一个比较好的手册 令附 lucence.jar