`
longzhun
  • 浏览: 362356 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

Lucene2.4第一个简单实例

 
阅读更多



 首先来认识下全文检索的工作流程:

 

 

 

java  Project 目录结构如图:


 

 


 

 package com.lebuqi.lucene; 	

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;

import com.lebuqi.lucene.utils.File2Document;

/**  
 * @Title: HelloWord.java 
 * @Package com.lebuqi.lucene 
 * @Description: TODO(添加描述) 
 * @author longzhun
 * @date 2011-8-6 下午07:13:56 
 * @version V1.0 
 */
public class HelloWord {

	String filePath = "G:\\work5\\luceneDemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt";
	String indexPath = "G:\\work5\\luceneDemo\\luceneIndex";
	Analyzer analyzer = new StandardAnalyzer();
	/**
	 * 创建索引
	 * IndexWriter是用来操作(增,删,改)索引库的
	 */
	@Test
	public void createIndex() throws Exception{
		Document doc = File2Document.file2Document(filePath);
		
		IndexWriter indexWriter = new IndexWriter(indexPath,analyzer,true,MaxFieldLength.LIMITED);
		indexWriter.addDocument(doc);
		
		indexWriter.close();
	}
	/**
	 * 搜索
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 */
	@Test
	public void seach() throws Exception{
		String queryString = "document";
		String[] fields = {"name","content"};
		QueryParser parser = new MultiFieldQueryParser(fields, analyzer);
		Query query = parser.parse(queryString);
		
		IndexSearcher indexSearcher = new IndexSearcher(indexPath);
		Filter filter = null;
		TopDocs topDocs = indexSearcher.search(query, filter, 10000);
		
		System.out.println("总共有"+topDocs.totalHits+"条匹配结果");
		
		for(ScoreDoc scoreDoc:topDocs.scoreDocs){
			int docSn = scoreDoc.doc; //文档内部编号
			Document doc = indexSearcher.doc(docSn); //根据文档编号取出相应文档
			
//			Field f = doc.getField("name");
//			f.stringValue();
			
			
			System.out.println("name    ="+doc.get("name"));
			System.out.println("content    ="+doc.get("content"));
			System.out.println("size    ="+doc.get("size"));
			System.out.println("path    ="+doc.get("path"));
		}
	}
}

	

 

File2Document .java

 

 package com.lebuqi.lucene.utils; 	

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;

/**  
 * @Title: File2Document.java 
 * @Package com.lebuqi.lucene.utils 
 * @Description: TODO(添加描述) 
 * @author longzhun
 * @date 2011-8-6 下午07:46:51 
 * @version V1.0 
 */
public class File2Document {

	public static Document file2Document(String path){
		File file = new File(path);
		
		//文件.name,content.size,path
		Document doc = new Document();
		doc.add(new Field("name", file.getName(),Store.YES,Index.ANALYZED ));
		doc.add(new Field("content", readFileContent(file),Store.YES,Index.ANALYZED ));
		doc.add(new Field("size", String.valueOf(file.length()),Store.YES,Index.NOT_ANALYZED));
		doc.add(new Field("path", file.getAbsolutePath(),Store.YES,Index.NO ));
		return doc;
	}

	private static String readFileContent(File file) {
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
			StringBuffer content = new StringBuffer();
			for(String line = null;(line = br.readLine())!= null;){
				content.append(line).append("\n");
			}
			
			return content.toString();
		} catch (Exception e) {
				throw new RuntimeException(e);
		}
		
		
	}
}

	

 

  • 大小: 6.1 KB
  • 大小: 55 KB
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics