`
NEO_ONE
  • 浏览: 46430 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

lucene3.5例子

    博客分类:
  • Java
阅读更多
package com.lucene;

import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class TestLucene {

	private static final File INDEX_PATH = new File(".\\index");// 索引文件位置
	private static final Analyzer ANALYZER = new WhitespaceAnalyzer(Version.LUCENE_35);
	//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
	//Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_35);

	static boolean buildIndex() {
		File readFile = new File(".\\duomicibiao.txt");
		HashMap<String, String> words = readFile(readFile);

		Document doc;
		if (words != null) {
			try {
				IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, ANALYZER);
				IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_PATH), writerConfig);

				Set<String> keys = words.keySet();

				for (Iterator<String> it = keys.iterator(); it.hasNext();) {
					String key = it.next();
					doc = new Document();
					Field index = new Field("index", key, Field.Store.YES,Field.Index.ANALYZED);
					Field contents = new Field("contents", words.get(key),Field.Store.YES, Field.Index.NO);
					doc.add(index);
					doc.add(contents);
					writer.addDocument(doc);
				}
				writer.close();// 这里不关闭建立索引会失败
				return true;
			} catch (Exception e) {
				e.printStackTrace();
				return false;
			}
		} else {
			System.out.println("文件读取错误");
			return false;
		}

	}

	static boolean noIndex() {
		File[] indexs = INDEX_PATH.listFiles();
		if (indexs.length == 0) {
			return true;
		} else {
			return false;
		}
	}

	static boolean deleteIndex() {
		File[] index = INDEX_PATH.listFiles();
		try {
			for (File file : index) {
				file.delete();
			}
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		}
		return true;
	}	
	
	static HashMap<String, String> readFile(File file) {
		InputStream in = null;
		InputStreamReader inR = null;
		BufferedReader br = null;
		HashMap<String, String> wordsMap = new HashMap<String, String>();
		try {
			in = new FileInputStream(file);
			inR = new InputStreamReader(in, "gbk");
			br = new BufferedReader(inR);
			String line;
			while ((line = br.readLine()) != null) {
				wordsMap.put(line.trim(), line.trim());
			}
			return wordsMap;

		} catch (Exception e) {
			e.printStackTrace();
			return null;
		} finally {
			try {
				if (in != null)
					in.close();
				if (inR != null)
					inR.close();
				if (br != null)
					br.close();
			} catch (Exception e) {
				e.printStackTrace();
				return null;
			}
		}
	}

	static void search(String queryStr,int hitsPerPage) {
		try {
			IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_PATH));
			IndexSearcher searcher = new IndexSearcher(reader);

			//Query query = new QueryParser(Version.LUCENE_35, "contents", ANALYZER).parse(queryStr + "~");
			Query query = new FuzzyQuery(new Term("index", queryStr));
			//Query query = new TermQuery(new Term("index", queryStr));
			
			TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
			searcher.search(query, collector);
			ScoreDoc[] hits = collector.topDocs().scoreDocs;

			if(hits.length > 0){
				for (int i = 0; i < hits.length; i++) {
					Document result = searcher.doc(hits[i].doc);
					System.out.println("【"+ i +"】" + hits[i].score + "    index:" + result.get("index") + "            contents:" + result.get("contents"));
				}
			}else{
				System.out.println("未找到结果");
			}
		} catch (Exception e) {
			System.out.println("Exception");
		}
	}

	public static void main(String[] args) {
		//deleteIndex();
		
		if (noIndex()) {
			buildIndex();
		}
		
		int resultSize = 20;
		
		search("张杰",resultSize);
	}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics