`
紫_色
  • 浏览: 142781 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

Lucene 4.6(二) Lucene内置查询对象

    博客分类:
  • J2EE
阅读更多

 Lucene 自身内置了许多查询对象,常用的有TermQuery、TermRangeQuery、NumericRangeQuery、PrefixQuery、WildcardQuery、FuzzyQuery、BooleanQuery、PhraseQuery.它们分别提供了不同形式的查询方式.分别看一下它们是如何使用的:

 

创建一个测试索引

public static void index(boolean hasIndex) {  
        int[] ids = {0,1,2,3,4,5};  
        String[] emails = {"lfd@foxmail.com","lfd@qq.com","lfx@qq.com","lfx@foxmail.com","hll@gcp.edu","zzp@gcp.edu"};  
        String[] contents = {  
                "incididunt ut labore et dolore magna aliqua. Ut enim ad lorem. ",  
                "Lorem ipsum dolor sit amet lorem consectetur adipisicing",  
                "dolor in reprehenderit in voluptate velit esse cillum nostrud exercitation ullamco laboris. ",  
                "dolor in reprehenderit in voluptate velit esse cillum nostrud exercitation ullamco laboris. ",  
                "Lorem ipsum dolor sit amet, consectetur adipisicing elit",  
                "Consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna "  
        };  
        String[] names = {"zzp","lfd","lfx","tom","huanglili","tzp"};  
        IndexWriter writer = null ;  
        Directory directory = null ;  
        try {  
            directory = FSDirectory.open(new File("D:/Lucene")) ;  
            //directory = new RAMDirectory() ; //索引文件在内存  
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_46,   
                    new StandardAnalyzer(Version.LUCENE_46))) ;  
            //是否重新构建索引  
            if(hasIndex) {  
                writer.deleteAll() ;  
            }   
              
            int count = names.length ;  
            for(int i=0; i<count; i++) {  
                Document doc = new Document() ;  
                  
                /* 首先是一个不变的属性值,这类字段还有一个主要用途, 
                 * 就是可以用于对搜索的返回结果集排序或是按范 
                 * 围查询FloatField   
                DoubleField     
                IntField   
                LongField   
                BinaryDocValuesField              
                NumericDocValuesField   
                SortedDocValuesField   
                SortedSetDocValuesField   
                 
                StoredField    整个域要存储的   
                StringField    是一个不需要分词,而直接用于索引的字符串   
                TextField      是一大块需要经过分词的文本  
                FieldType fieldType = new FieldType(); 
                fieldType.setIndexed(true);//set 是否索引 
                fieldType.setStored(true);//set 是否存储 
                fieldType.setTokenized(false);//set 是否分词*/                
                doc.add(new IntField("id", ids[i], Store.YES)) ;  
                doc.add(new StringField("email", emails[i], Store.YES));  
                doc.add(new TextField("content", contents[i], Store.YES)) ;  
                FieldType type = new FieldType() ;  
                type.setIndexed(true) ;  
                type.setStored(true) ;  
                doc.add(new Field("name", names[i], type)) ;  
                writer.addDocument(doc) ;  
            }  
            writer.commit() ;  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                if(writer != null) {  
                    writer.close() ;  
                    writer = null ;  
                }  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  

 

 为子方便,这里提取了一个方法获取IndexSearcher 

private static IndexSearcher getSearcher(Directory directory) {
		try {
			if(reader == null) {
				reader = DirectoryReader.open(directory) ;
			} else {
				DirectoryReader ir = DirectoryReader.openIfChanged(reader) ;
				if(ir != null) {
					reader.close() ;
					reader = ir ;
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		IndexSearcher searcher = new IndexSearcher(reader) ;
		return searcher ;
	}

 这里的openIfChanged方法:判断索引是否发生改变,如果发生变化则重新生成DirectoryReader.

 

TermQuery对象介绍:搜索指定Field中配置的内容.

public static void searcher01() {
		IndexSearcher searcher = getSearcher(directory) ;
		Query query = new TermQuery(new Term("content", "visited")) ; 
		try {
			TopDocs topDocs = searcher.search(query, 10) ;
			ScoreDoc[] scores = topDocs.scoreDocs ;
			int length = scores.length ;
			for(int i=0; i<length; i++) {
				Document doc = searcher.doc(scores[i].doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 new TermQuery(new Term("content", "visited")) ;  传入一个Term,搜索Field为"content"中含有"visited"的内容

 

 

TermRangeQuery对象介绍:搜索字符边界

public static void searcher02() {
		Query query = new TermRangeQuery("name", new BytesRef("a") , new BytesRef("i"), true, true) ;
		IndexSearcher searcher = getSearcher(directory) ;
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 new TermRangeQuery("name", new BytesRef("a") , new BytesRef("i"), true, true) ;搜索Field为"name",字符开头从a到i的内容.后边两个true分别表示是否包含开始字符,是否包含结束字符.

 

NumericRangeQuery对象介绍:搜索数字的开始和结束内容.

public static void searcher03() {
		Query query = NumericRangeQuery.newIntRange("id", 1, 4, true, true) ;
		IndexSearcher searcher = getSearcher(directory) ;
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 NumericRangeQuery.newIntRange("id", 1, 4, true, true) ;搜索Field为"id"标识从1到4的内容,后面两个true分别表示是否包含1,是否包含4

 

 

PrefixQuery查询对象:匹配单词的开始

//单词的开始
	public static void searcher04() {
		Query query = new PrefixQuery(new Term("content", "in")) ;
		IndexSearcher searcher = getSearcher(directory) ;
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 new PrefixQuery(new Term("content", "in")) ;  搜索Fiild为"content"中单词开始为"in"的内容.

 

 

WildcardQuery查询对象:单词通配符搜索

public static void searcher05() {
		Query query = new WildcardQuery(new Term("content", "*tate")) ;
		IndexSearcher searcher = getSearcher(directory) ;
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

 WildcardQuery(new Term("content", "*tate")."*"表示任意内容.即搜索Field为"content"中单词以tate结束的内容

 

FuzzyQuery查询对象:模糊匹配
public static void searcher06() {
		Query query = new FuzzyQuery(new Term("name", "lfx")) ;
		IndexSearcher searcher = getSearcher(directory) ;
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
 FuzzyQuery(new Term("name", "lfx")) ; 模糊匹配Field为"name"且内容有与"lfx"相同或相近的数据.即如果内容中有名为"lfh"的也会被匹配出来.
BooleanQuery查询对象:多条件查询
public static void searcher07() {
		IndexSearcher searcher = null ;
		BooleanQuery query = new BooleanQuery() ;
		/*
		 * BooleanQuery可以连接多个子查询
		 * Occur.MUST表示必须出现
		 * Occur.SHOULD表示可以出现
		 * Occur.MUSE_NOT表示不能出现
		 */
		query.add(new TermQuery(new Term("content", "sed")), Occur.MUST) ;
		query.add(new TermQuery(new Term("content", "do")), Occur.MUST) ;
		searcher = getSearcher(directory) ;
		
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
Occur.MUST、Occur.SHOULD、Occur.MUSE_NOT 可以理解为and、or、not and.在这个示例中查询Field为"content"数据包含"public"和"cpu"的内容.
PhraseQuery查询对象:根据单词间的跳数进行查询
public static void searcher08() {
		IndexSearcher searcher = null ;
		PhraseQuery query = new PhraseQuery() ;
		
		//设置public 与 test01之间有一个单词距离
		query.setSlop(1) ;
		query.add(new Term("content", "dolor")) ;
		query.add(new Term("content", "amet")) ;
		searcher = getSearcher(directory) ;
		
		try {
			TopDocs docs = searcher.search(query, 100) ;
			ScoreDoc[] scores = docs.scoreDocs ;
			for(ScoreDoc score : scores) {
				Document doc = searcher.doc(score.doc) ;
				System.out.println("id:" + doc.get("id") + "  email:" + doc.get("email") + "  content:" + doc.get("content") + " name:" + doc.get("name"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
query.setSlop(1) ;为设置跳数,在这个示例中.查询FIeld为"content"内容中包含"public"和"test01"且它们的间距为1的数据.
下面是例子的源代码(Maven项目):
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics