`

使用lucene PrefixQuery 根据拼音前缀查询

 
阅读更多

PrefixQuery类似于 数据库中的 like 'a%'查询
以下是测试代码,这种方式的缺陷是人为的分出了更多的term,目前还在找有没有更优的方法。
测试代码:
public static String index_path3="D:\\ix3"; 
        public static void main(String[]args) throws Exception{ 
             
     
            //createPrefixIndex(); 
                    //前缀查询 
            Query trquery  = new PrefixQuery(new Term("pinyin","ghai" ));    
     
            IndexReader  indexReader = IndexReader.open(FSDirectory.open(new File(index_path3))); 
             
            Searcher  searcher = new IndexSearcher(indexReader); 
             
            TopDocs topDocs = searcher.search(trquery,100); 
             
            for (ScoreDoc hits:topDocs.scoreDocs){ 
                Document doc = searcher.doc(hits.doc); 
                System.out.println("doc = "+doc.get("hotelName")); 
            } 
             
        } 
        /**
         * 创建pinyin索引
         */ 
        public static void createPrefixIndex(){ 
            Analyzer  analyzer = new IKAnalyzer(); 
            PerFieldAnalyzerWrapper  perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(analyzer); 
            try { 
                 
                //perFieldAnalyzerWrapper.addAnalyzer("price", new WhitespaceAnalyzer()); 
                IndexWriter writer = new IndexWriter(FSDirectory.open(new File(index_path3)), perFieldAnalyzerWrapper, true,IndexWriter.MaxFieldLength.LIMITED); 
                Document doc  = new Document(); 
                String pinyin="shanghai"; 
                 
                             int len = pinyin.length();    
                //将拼音截取保存 
                            for(int i=0;i<len;i++){    
                    String value = pinyin.substring(i,len);    
                    doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
                } 
                doc.add(new Field("hotelName", "test2", Field.Store.YES, Field.Index.ANALYZED)); 
     
                writer.addDocument(doc); 
                 
                doc  = new Document(); 
                doc.add(new Field("hotelName", "test3", Field.Store.YES, Field.Index.ANALYZED)); 
                pinyin="beijing"; 
                len = pinyin.length();    
                for(int i=0;i<len;i++){    
                    String value = pinyin.substring(i,len);    
                    doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
                } 
                writer.addDocument(doc); 
                 
                doc  = new Document(); 
                doc.add(new Field("hotelName", "test6", Field.Store.YES, Field.Index.ANALYZED)); 
                pinyin="zhongguo_benxi"; 
                len = pinyin.length();    
                for(int i=0;i<len;i++){    
                    String value = pinyin.substring(i,len);    
                    doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
                } 
                writer.addDocument(doc); 
                 
                doc  = new Document(); 
                doc.add(new Field("hotelName", "test1", Field.Store.YES, Field.Index.ANALYZED)); 
                pinyin="tianjin"; 
                len = pinyin.length();    
                for(int i=0;i<len;i++){    
                    String value = pinyin.substring(i,len);    
                    doc.add(new Field("pinyin", value, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
                } 
                writer.addDocument(doc); 
                 
             
                 
                writer.close(); 
                 
            } catch (Exception e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
             
        } 
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics