`

lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词

 
阅读更多
package com.zjr.service.impl; 
     
    import java.io.File; 
    import java.io.IOException; 
    import java.io.StringReader; 
    import java.lang.reflect.InvocationTargetException; 
    import java.util.ArrayList; 
    import java.util.List; 
     
    import org.apache.commons.beanutils.BeanUtils; 
    import org.apache.commons.logging.Log; 
    import org.apache.commons.logging.LogFactory; 
    import org.apache.lucene.analysis.Analyzer; 
    import org.apache.lucene.analysis.TokenStream; 
    import org.apache.lucene.document.Document; 
    import org.apache.lucene.document.Field; 
    import org.apache.lucene.document.Field.Index; 
    import org.apache.lucene.document.Field.Store; 
    import org.apache.lucene.index.CorruptIndexException; 
    import org.apache.lucene.index.IndexReader; 
    import org.apache.lucene.index.IndexWriter; 
    import org.apache.lucene.index.Term; 
    import org.apache.lucene.search.BooleanClause; 
    import org.apache.lucene.search.IndexSearcher; 
    import org.apache.lucene.search.Query; 
    import org.apache.lucene.search.ScoreDoc; 
    import org.apache.lucene.search.Sort; 
    import org.apache.lucene.search.SortField; 
    import org.apache.lucene.search.TopDocs; 
    import org.apache.lucene.search.TopScoreDocCollector; 
    import org.apache.lucene.search.highlight.Highlighter; 
    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
    import org.apache.lucene.search.highlight.QueryScorer; 
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
    import org.apache.lucene.store.Directory; 
    import org.apache.lucene.store.FSDirectory; 
    import org.wltea.analyzer.lucene.IKAnalyzer; 
    import org.wltea.analyzer.lucene.IKQueryParser; 
    import org.wltea.analyzer.lucene.IKSimilarity; 
     
    import com.zjr.model.User; 
     
    public class UserIndexService { 
     
        private final Log logger = LogFactory.getLog(UserIndexService.class); 
        private final String dirPath = "d:/temp/user"; 
     
        Analyzer analyzer = new IKAnalyzer(); 
        Directory directory = null; 
        IndexWriter writer = null; 
        IndexSearcher indexSearcher = null; 
     
        private void confirmDirs() { 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                indexFile.mkdirs(); 
            } 
            if (!indexFile.exists() || !indexFile.canWrite()) { 
                if (logger.isDebugEnabled()) 
                    logger.error("索引文件目录创建失败或不可写入!"); 
            } 
        } 
     
        public void init() { 
            confirmDirs(); 
            try { 
                File f = new File(dirPath); 
                directory = FSDirectory.open(f); 
                 
            } catch (Exception e) { 
                if (logger.isDebugEnabled()) { 
                    logger.error("解除索引文件锁定失败!" + e.getCause()); 
                } 
            } 
        } 
     
        public void createIndex(List<User> userList) { 
            init(); 
            try { 
                 
    //           第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中), 
    //          第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度 
                writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED); 
                writer.setMergeFactor(500); 
                writer.setMaxBufferedDocs(155); 
                writer.setMaxFieldLength(Integer.MAX_VALUE); 
                writeIndex(writer, userList); 
                writer.optimize(); 
                writer.close(); 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
        } 
     
        public List<User> search(String keyword) { 
     
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
                // 单字段查询,单条件查询 
                // Query query = IKQueryParser.parse("userInfo", keyword); 
     
                // 多字段,单条件查询 
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword); 
     
                // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And) 
                // BooleanClause.Occur[]数组,它表示多个条件之间的关系, 
                // BooleanClause.Occur.MUST表示 and, 
                // BooleanClause.Occur.MUST_NOT表示not, 
                // BooleanClause.Occur.SHOULD表示or. 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // BooleanClause.Occur[] flags=new 
                // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 
                // Query query = IKQueryParser.parseMultiField(fields, 
                // keyword,flags); 
     
                // //多Field,多条件查询分析 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // String[] queries = new String[]{keyword,keyword}; 
                // Query query = IKQueryParser.parseMultiField(fields,queries); 
     
                // 多Field,多条件,多Occur 查询 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // String[] queries = new String[]{keyword,keyword}; 
                // BooleanClause.Occur[] flags=new 
                // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 
                // Query query = 
                // IKQueryParser.parseMultiField(fields,queries,flags); 
     
                // 搜索相似度最高的20条记录 
                TopDocs topDocs = indexSearcher.search(query, 20); 
                ScoreDoc[] hits = topDocs.scoreDocs; 
                return hitsToQuery(hits, query); 
     
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
     
        private List<User> hitsToQuery(ScoreDoc[] hits, Query query) { 
            List<User> list = new ArrayList<User>(); 
            try { 
                for (int i = 0; i < hits.length; i++) { 
                    User u = new User(); 
                    Document doc = indexSearcher.doc(hits[i].doc); 
                    u.setUserId(Integer.parseInt(doc.get("userId"))); 
                    u.setUserName(doc.get("userName")); 
                    u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
                    // 高亮设置 
                    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 
                            "<font color=\"red\">", "</font>"); 
                    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 
                            new QueryScorer(query)); 
                    TokenStream tokenStream = analyzer.tokenStream("text", 
                            new StringReader(doc.get("userInfo"))); 
                    String userInfo = highlighter.getBestFragment(tokenStream, doc 
                            .get("userInfo")); 
                    if (userInfo != null) { 
                        u.setUserInfo(userInfo); 
                    } else { 
                        u.setUserInfo(doc.get("userInfo")); 
                    } 
     
                    SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter( 
                            "<font color=\"red\">", "</font>"); 
                    Highlighter highlighter1 = new Highlighter( 
                            simpleHtmlFormatter1, new QueryScorer(query)); 
                    TokenStream tokenStream1 = analyzer.tokenStream("text1", 
                            new StringReader(doc.get("parameter1"))); 
                    String p1 = highlighter1.getBestFragment(tokenStream1, doc 
                            .get("parameter1")); 
                    if (p1 != null) { 
                        u.setParameter1(p1); 
                    } else { 
                        u.setParameter1(doc.get("parameter1")); 
                    } 
     
                    u.setParameter2(doc.get("parameter2")); 
                    u.setParameter3(doc.get("parameter3")); 
                    u.setParameter4(doc.get("parameter4")); 
                    list.add(u); 
                } 
     
                indexSearcher.close(); 
                return list; 
            } catch (CorruptIndexException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
            return null; 
        } 
     
        public void writeIndex(IndexWriter writer, List<User> userList) { 
     
            try { 
                for (User u : userList) { 
                    Document doc = getDoc(u); 
                    writer.addDocument(doc); 
                } 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
        } 
     
        private Document getDoc(User user) { 
            System.out.println("用户ID 为" + user.getUserId() + " 索引被创建"); 
            Document doc = new Document(); 
            addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED); 
            addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED 
                                                                                // 不分词,但建立索引 
            addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED 
                                                                                // 分词并且建立索引 
            addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED); 
            return doc; 
        } 
     
        private void addField2Doc(Document doc, Object bean, String name, Store s, 
                Index i) { 
            String value; 
            try { 
                value = BeanUtils.getProperty(bean, name); 
                if (value != null) { 
                    doc.add(new Field(name, value, s, i, 
                            Field.TermVector.WITH_POSITIONS_OFFSETS)); 
                } 
            } catch (IllegalAccessException e) { 
                logger.error("get bean property error", e); 
            } catch (InvocationTargetException e) { 
                logger.error("get bean property error", e); 
            } catch (NoSuchMethodException e) { 
                logger.error("get bean property error", e); 
            } 
        } 
     
        /**
         * 没有排序,有高亮,有分页
         * 
         * @param pageNo
         * @param pageSize
         * @param keyword
         * @return
         */ 
        public PageBean getPageQuery(int pageNo, int pageSize, String keyword) { 
            List result = new ArrayList(); 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
     
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 
                        BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 
     
                TopScoreDocCollector topCollector = TopScoreDocCollector.create( 
                        indexSearcher.maxDoc(), true); 
                indexSearcher.search(query, topCollector); 
                // 查询当页的记录 
                ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize, 
                        pageSize).scoreDocs; 
     
                // String[] highlightCol = {"userInfo", "parameter1"}; 
                // 高亮设置 
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 
                        "<font color=\"red\">", "</font>"); 
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 
                        new QueryScorer(query)); 
     
                for (ScoreDoc scdoc : docs) { 
                    User u = new User(); 
                    Document doc = indexSearcher.doc(scdoc.doc); 
                    //               
                    // for (Fieldable fa : doc.getFields()) { 
                    // System.out.println(fa.name()); 
                    // String value = doc.get(fa.name()); 
                    // for (String col : highlightCol) { 
                    // if(fa.name().equals(col)) { 
                    // //设置高显内容 
                    // TokenStream tokenStream = analyzer.tokenStream("text",new 
                    // StringReader(value)); 
                    // value = highlighter.getBestFragment(tokenStream, value); 
                    // } 
                    // } 
                    //                   
                    // } 
     
                    u.setUserId(Integer.parseInt(doc.get("userId"))); 
                    u.setUserName(doc.get("userName")); 
                    u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
     
                    TokenStream tokenStream = analyzer.tokenStream("text", 
                            new StringReader(doc.get("userInfo"))); 
                    String userInfo = highlighter.getBestFragment(tokenStream, doc 
                            .get("userInfo")); 
                    if (userInfo != null) { 
                        u.setUserInfo(userInfo); 
                    } else { 
                        u.setUserInfo(doc.get("userInfo")); 
                    } 
     
                    TokenStream tokenStream1 = analyzer.tokenStream("text1", 
                            new StringReader(doc.get("parameter1"))); 
                    String p1 = highlighter.getBestFragment(tokenStream1, doc 
                            .get("parameter1")); 
                    if (p1 != null) { 
                        u.setParameter1(p1); 
                    } else { 
                        u.setParameter1(doc.get("parameter1")); 
                    } 
     
                    u.setParameter2(doc.get("parameter2")); 
                    u.setParameter3(doc.get("parameter3")); 
                    u.setParameter4(doc.get("parameter4")); 
                    result.add(u); 
     
                } 
                PageBean pb = new PageBean(); 
                pb.setCurrentPage(pageNo);// 当前页 
                pb.setPageSize(pageSize); 
                pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目 
                pb.setList(result); 
                return pb; 
     
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
     
        /**
         * 排序,有高亮,有分页
         * 
         * @param pageNo
         * @param pageSize
         * @param keyword
         * @return
         */ 
        public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) { 
            List result = new ArrayList(); 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
     
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 
                        BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 
     
                // 多字段排序,设置在前面的会优先排序 
                SortField[] sortFields = new SortField[2]; 
                SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序 
                SortField FIELD_SEX = new SortField("userAge", SortField.INT, true); 
                sortFields[0] = sortField; 
                sortFields[1] = FIELD_SEX; 
                Sort sort = new Sort(sortFields); 
     
                TopDocs topDocs = indexSearcher.search(query, null, 50, sort); 
     
                if (topDocs.totalHits != 0) { 
                    // for(ScoreDoc sd : topDocs.scoreDocs) { 
                    //                   
                    // } 
                    // 高亮设置 
                    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); 
                    Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); 
     
                    for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) { 
                        ScoreDoc scdoc = topDocs.scoreDocs[i]; 
                        User u = new User(); 
                        Document doc = indexSearcher.doc(scdoc.doc); 
                        u.setUserId(Integer.parseInt(doc.get("userId"))); 
                        u.setUserName(doc.get("userName")); 
                        u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
                        TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo"))); 
                        String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo")); 
                        if (userInfo != null) { 
                            u.setUserInfo(userInfo); 
                        } else { 
                            u.setUserInfo(doc.get("userInfo")); 
                        } 
     
                        TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1"))); 
                        String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1")); 
                        if (p1 != null) { 
                            u.setParameter1(p1); 
                        } else { 
                            u.setParameter1(doc.get("parameter1")); 
                        } 
     
                        u.setParameter2(doc.get("parameter2")); 
                        u.setParameter3(doc.get("parameter3")); 
                        u.setParameter4(doc.get("parameter4")); 
                        result.add(u); 
     
                    } 
                    PageBean pb = new PageBean(); 
                    pb.setCurrentPage(pageNo);// 当前页 
                    pb.setPageSize(pageSize); 
                    pb.setAllRow(topDocs.totalHits);// hit中的记录数目 
                    pb.setList(result); 
                    return pb; 
     
                } 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
         
        /**
         * 删除索引
         * @param userId
         */ 
        public void deleIndex(String userId){ 
             
            try { 
                File f = new File(dirPath); 
                directory = FSDirectory.open(f); 
                IndexReader reader = IndexReader.open(directory,false);  
                Term term = new Term("userId", userId);  
                reader.deleteDocuments(term); 
                reader.close();  
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
             
             
        } 
     
    } 
分享到:
评论

相关推荐

    Lucene 3.6.1完整案例

    Lucene 3.6.1: 中文分词、创建索引库、排序、多字段分页查询以及高亮显示源 希望对大家有帮助, 我自己建立的mysql数据库 使用了IKAnalyzer分词器源代码,大家可以自己设置停词,也可以自己改写算法

    java开源包3

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包1

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包11

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包2

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包6

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包5

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包10

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包4

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包8

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包7

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包9

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    java开源包101

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    Java资源包01

    GWT Advanced Table 是一个基于 GWT 框架的网页表格组件,可实现分页数据显示、数据排序和过滤等功能! Google Tag Library 该标记库和 Google 有关。使用该标记库,利用 Google 为你的网站提供网站查询,并且可以...

    JAVA上百实例源码以及开源项目源代码

    内容索引:Java源码,窗体界面,3DMenu  Java 3DMenu 界面源码,有人说用到游戏中不错,其实平时我信编写Java应用程序时候也能用到吧,不一定非要局限于游戏吧,RES、SRC资源都有,都在压缩包内。 Java zip压缩包查看...

    JAVA上百实例源码以及开源项目

    内容索引:Java源码,窗体界面,3DMenu  Java 3DMenu 界面源码,有人说用到游戏中不错,其实平时我信编写Java应用程序时候也能用到吧,不一定非要局限于游戏吧,RES、SRC资源都有,都在压缩包内。 Java zip压缩包查看...

Global site tag (gtag.js) - Google Analytics