`
chuan9966
  • 浏览: 47053 次
文章分类
社区版块
存档分类
最新评论

2013-04-24期-索引内容查询

 
阅读更多

配图参考mainTest.java里的main方法

public class mainTest {
    /**
     *  Function:抓取网页内容
     *  @author JLC
     *  @return
     */
    public static List<Document>  getWebContentDocuments(){
        List<Document> docs = new ArrayList<Document>();
        WebContent wc = new WebContent();
        Document doc1 = PackContentObject.convertContentToDoc(wc.getContentFromSite("http://news.163.com"));
        docs.add(doc1);
        Document doc2 = PackContentObject.convertContentToDoc(wc.getContentFromSite("http://news.sohu.com/"));
        docs.add(doc2);
        return docs;
    }
    /**
     * 创建索引
     */
    public static void createSearchEngineData(){
         SearchEngineCore  se = SearchObject.getInstance().getLuceneContext("search");
         List<Document> docList = getWebContentDocuments();
         for(Document doc:docList){
             try{
                 se.getTw().addDocument(doc);
                 se.commitIndex();
                 se.refreshData();
                 se.getNRTManager().maybeRefresh();
             }catch(Exception e){
                 e.printStackTrace();
             }
         }
    }

    public static void main(String args[]){
        //创建索引数据
        //createSearchEngineData(); 
        searchContent("网易新闻");
    }


    /**
     * 创建搜索方法
     * @param keyWord
     */
    public static void searchContent(String keyWord){
        //List<ContentObject> searcheResult =  new ArrayList<ContentObject>();
        try{
            /*
            Version v = Version.LUCENE_42;
            //取得查询对象
            IndexReader[] readers =  SearchObject.getInstance().getSearcherReads();
            //多域查询
            MultiReader mReaders = new MultiReader(readers);
            IndexSearcher indexSearch = new  IndexSearcher(mReaders); 
            //自带的标准分词
            Analyzer analyzer =new StandardAnalyzer(v);
            //创建boolean查询
            BooleanQuery query = new BooleanQuery();
            String[] field = {"title", "content"};
            BooleanClause.Occur[] flags = new BooleanClause.Occur[2];
            flags[0] = BooleanClause.Occur.SHOULD;
            flags[1] = BooleanClause.Occur.SHOULD;
            Query query1 = MultiFieldQueryParser.parse(v, QueryParser.escape(keyWord), field, flags, analyzer);
            //必须满足该查询条件
            query.add(query1, Occur.MUST);
            */

            //精确查询,TermQuery 里面内容必须完全匹配才能查询到结果
            IndexSearcher indexSearch = SearchObject.getInstance().getSearcher("search");
            TermQuery  query = new TermQuery(new Term("title",keyWord));

            //10000为最多查询条数
            TopScoreDocCollector topCollector = TopScoreDocCollector.create(10000, true);
            indexSearch.search(query, topCollector); 
            //取得查询结果
            TopDocs topDocs = topCollector.topDocs(); 
            int resultCount=topDocs.totalHits;
            for(int i=0;i<resultCount;i++){
                Document doc = indexSearch.doc(topDocs.scoreDocs[i].doc);
                System.out.println("标题:"+doc.get("title"));
                System.out.println("内容:"+doc.get("content").substring(0,200));
            }
            System.out.println("查询结果条数:"+resultCount);
        }catch(Exception e){
            e.printStackTrace();
        }
    }

}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics