`

Lucene 3.0 初步1(创建索引)

 
阅读更多
初次接触Lucene,一步一步来,要实现全文检索,第一步就得先创建索引,请直接看代码:
package cn.com.alei.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/**
 * @author alei
 * @version 创建时间:2012-4-14 下午03:19:09
 * 类说明
 */
public class Indexer {
  
	 public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {
		 //被索引文件夹的位置
		 File path = new File("D:\\framework\\luceneDir");
		 
		 
		 /*存放索引文件的位置*/
		 File fileindex = new File("D:\\framework\\luceneIndex");
		 
		//创建Directory对象
		 Directory directory = new SimpleFSDirectory(fileindex);
		 
		 //Analyzer主要负责对各种输入的数据源数据进行分析,包括过滤,分词等各种功能,StandardAnalyzer是Lucene里自带的Analyzer子类实现
		 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
		 
		 /*/创建IndexWriter对象,第一个参数是Directory,第二个参数为分词器,第三个参数表示是否创建,
		   false表示修改,第四个表示分词的最大值,比如new MaxFieldLength(2)
		      表示两个字一分,一般用IndexWriter.MaxFieldLength.UNLIMITED
		      */
		 IndexWriter indexwriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED);
		 
		 File[] files = path.listFiles();
		 
		 long startTime = new Date().getTime();
		 
		 //增加Document到索引中去
		 for(int i = 0 ; i<files.length ; i++){
			 if(files[i].isFile()&&files[i].getName().endsWith(".txt")){
				 System.out.println("File: " + files[i].getCanonicalPath() + " 正在被索引...");
				 String fileContent = readFile(files[i],"GB2312");
				 System.out.println(fileContent);
				 
				 Document document = new Document();
				 Field fieldcontent = new Field("content" , fileContent , Store.NO , Index.ANALYZED);
				 Field filedpath = new Field("path" , files[i].getCanonicalPath() , Field.Store.NO , Field.Index.NOT_ANALYZED);
				 document.add(fieldcontent);
				 document.add(filedpath);
				 indexwriter.addDocument(document);
			 }
		 }
		 //对索引进行优化
		 indexwriter.optimize();
		 
		 indexwriter.close();
		 
		 long endTime = new Date().getTime();
		 System.out.println("花费了 :" + (endTime - startTime) + "毫秒把文档加到索引中去!!!");
		 
	}
	
	 public static String readFile(File file , String charset) throws IOException{
		 BufferedReader bufferedReader = new BufferedReader(
				                         new InputStreamReader(
				                         new FileInputStream(file),charset));
		 StringBuilder sb = new StringBuilder();
		 String str ; 
		 while((str=bufferedReader.readLine())!= null){
			 sb.append(str);
		 }
		 bufferedReader.close();
		 return sb.toString();
	 }

}


分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics