`
leiwuluan
  • 浏览: 694571 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类

lucene CURD

阅读更多
package com.lucene.LuceneTest.memery;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class AppIndex {
	
	private Directory directory;
	
	public AppIndex() throws Exception {
		directory=FSDirectory.open(new File("E:/index/appIndex"));
	}
	
	public IndexWriter getWriter() throws Exception{
		return new IndexWriter(directory,new StandardAnalyzer(Version.LUCENE_30),false,IndexWriter.MaxFieldLength.UNLIMITED);
	}
	
	//初始化索引
	public void initIndex() throws Exception{
		IndexWriter writer=new IndexWriter(directory,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);
		
		File[] files=new File("D:/test_data/result1").listFiles();
		for(int i=0;i<files.length;i++){
			File f=files[i];
			System.out.println(f.getName());
			BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(f)));
			String line="";
			br.readLine();
			while((line=br.readLine())!=null){
				String[] arrStr=line.split("\",\"");
				if(arrStr.length>3){
					Document doc=new Document();
					doc.add(new Field("appid",arrStr[0].replaceAll("\"", "").trim(),Field.Store.YES,Field.Index.NOT_ANALYZED));
					doc.add(new Field("onlineDay",arrStr[1].trim(),Field.Store.YES,Field.Index.NOT_ANALYZED));
					doc.add(new Field("sellerUrl",arrStr[2].trim(),Field.Store.YES,Field.Index.NO));
					doc.add(new Field("sellerName",arrStr[7].trim(),Field.Store.YES,Field.Index.ANALYZED));
					
					//发布时间
					String dateStr=arrStr[6].replaceAll("[TZ]", " ").trim();
					doc.add(new Field("releaseDate",dateStr,Field.Store.YES,Field.Index.NOT_ANALYZED));
					writer.addDocument(doc);
				}
			}
		}
		
		writer.close();
		System.out.println("总共索引"+writer.numDocs()+"条!");
	}
	
	//添加文档
	public int addDocument(Document doc) throws Exception{
		IndexWriter writer=getWriter();
		writer.addDocument(doc);
		writer.close();
		return writer.numDocs();
	}
	
	//删除文档
	public int removeDocument(String appid) throws Exception{
		IndexWriter writer=getWriter();
		
		System.out.println(writer.numDocs());
		Term term=new Term("appid",appid);
		writer.deleteDocuments(term);
		writer.commit();
		
		int delNum=writer.maxDoc()-writer.numDocs();
		writer.optimize();//优化合并
		writer.close();
		return delNum;
	}
	
	public void updateDoc(String updateStr) throws Exception{
		IndexWriter writer=getWriter();
		
		System.out.println("numDocs"+writer.numDocs());
		
		String[] cons=updateStr.split("\\|");
		
		String s=updateStr.substring(updateStr.indexOf("appid=")+6);
		String appid=s.substring(0,s.indexOf("|")).replaceAll("\"","");
		
		
		System.out.println("appid-->"+appid);
		for(Document doc:this.searchDoc("appid="+appid)){
			for(int i=0;i<cons.length;i++){
				String[] unitStr=cons[i].split("=");
				Store store=Field.Store.YES;
				Index index=Field.Index.ANALYZED;
				
				if("sellerName".equals(unitStr[0])){
					store=Field.Store.YES;
					index=Field.Index.ANALYZED;
//					doc.removeField("sellerName");
				}else if("sellerUrl".equals(unitStr[0])){
					store=Field.Store.YES;
					index=Field.Index.NOT_ANALYZED;
//					doc.removeField("sellerUrl");
				}else if("releaseDate".equals(unitStr[0])){
					store=Field.Store.YES;
					index=Field.Index.NOT_ANALYZED;
//					doc.removeField("releaseDate");
				}else if("onlineDay".equals(unitStr[0])){
					store=Field.Store.YES;
					index=Field.Index.NOT_ANALYZED;
//					doc.removeField("onlineDay");
				}
				
				if(!"appid".equals(unitStr[0])){
					Field field=new Field(unitStr[0],unitStr[1].replaceAll("\"", "").trim(),store,index);
					doc.add(field);
				}
			}
			writer.updateDocument(new Term("appid",appid), doc);
			writer.commit();
		}
		
		System.out.println("numDocs"+writer.numDocs());
		writer.close();
	}
	
	//搜索文档
	public List<Document> searchDoc(String condition) throws Exception{
		//搜索
		
		IndexSearcher searcher=new IndexSearcher(this.directory);
		
//		QueryParser parser=new QueryParser(Version.LUCENE_30,"sellerName",new StandardAnalyzer(Version.LUCENE_30));
//		Query query=parser.parse(condition);
		
		String[] cons=condition.split("\\|");
//		
		String[] fields=new String[cons.length];
		String[] queries=new String[cons.length];
		Occur[] clauses=new Occur[cons.length];
		for(int i=0;i<cons.length;i++){
			String[] unitQ=cons[i].split("=");
			fields[i]=unitQ[0];
			queries[i]=unitQ[1].replaceAll("\"", "").trim();
			clauses[i]=BooleanClause.Occur.SHOULD;
			
			System.out.println(unitQ[0]+"  |  "+unitQ[1].replaceAll("\"", "").trim());
		}
		Query query = MultiFieldQueryParser.parse(Version.LUCENE_30, queries, fields, clauses, new StandardAnalyzer(Version.LUCENE_30));
		
		
		TopDocs tds= searcher.search(query,10);
		List<Document> list=new ArrayList<Document>();
		
		ScoreDoc[] scoreDocs=tds.scoreDocs;
		for(int i=0;i<scoreDocs.length;i++){
			list.add(searcher.doc(scoreDocs[i].doc));
		}
		System.out.println(tds.totalHits);
		searcher.close();
		return list;
	}
	public static void main(String[] args) throws Exception {
		AppIndex appIndex=new AppIndex();
//		appIndex.initIndex();
         		
		Scanner input=new Scanner(System.in);
		while(true){
			System.out.println("输入1添加索引          输入2搜索         输入3删除指定appid文档         输入4更新\n");
			int flag=input.nextInt();
			switch(flag){
			case 1:
				
				System.out.println("输入以下字段用空格格开[appid,onlineDay,sellerUrl,sellerName,releaseDate]\n\n");
				String appid=input.next();
				String onlineDay=input.next();
				String sellerUrl=input.next();
				String sellerName=input.next();	
				String releaseDate=input.next();
				
				Document doc=new Document();
				doc.add(new Field("appid",appid,Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("onlineDay",onlineDay,Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("sellerUrl",sellerUrl,Field.Store.YES,Field.Index.NO));
				doc.add(new Field("sellerName",sellerName,Field.Store.YES,Field.Index.ANALYZED));
				
				//发布时间
				doc.add(new Field("releaseDate",releaseDate,Field.Store.YES,Field.Index.NOT_ANALYZED));
				int indexNum=appIndex.addDocument(doc);
				System.out.println("添加成功!共有索引["+indexNum+"]个");
				
				break;
			case 2:
				
				System.out.println("请输入查询条件:appid=\"\"|onlineDay=\"\"|sellerUrl=\"\"|sellerName=\"\"|releaseDate=\"\"");
				String condition=input.next();
				
				 List<Document> list=appIndex.searchDoc(condition);
				 System.out.println(list.size());
				 for(Document d:list){
					 for(Field f:d.getFields("onlineDay")){
						 System.out.println(f.stringValue());
					 }
					 System.out.println("[appid="+d.get("appid")+",onlineDay="+d.get("onlineDay")+",sellerUrl="+d.get("sellerUrl")+",sellerName="+d.get("sellerName")+",releaseDate="+d.get("releaseDate")+"]");
				 }
				break;
				
			case 3:
				System.out.println("请输入appid\n");
				appid=input.next();
				int delNum=appIndex.removeDocument(appid);
				System.out.println("删除"+delNum+"条!");
				
				
				
				break;
				
			case 4:
				System.out.println("请输入更新字段和值:appid=\"\"|onlineDay=\"\"|sellerUrl=\"\"|sellerName=\"\"|releaseDate=\"\"");
				
				String updateStr=input.next();
				appIndex.updateDoc(updateStr);
				
				break;
				default:
					
					System.out.println("命令不对重新输入");
			
			}
		}
	}
}
 
分享到:
评论

相关推荐

    lucene实例lucene实例

    lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例

    lucene,lucene教程,lucene讲解

    lucene,lucene教程,lucene讲解。 为了对文档进行索引,Lucene 提供了五个基础的类 public class IndexWriter org.apache.lucene.index.IndexWriter public abstract class Directory org.apache.lucene.store....

    lucene的curd

    创建索引,利用索引来查询。采用的框架是hibernate+struts2,代码里面有注释!

    lucene3.0 lucene3.0

    lucene3.0 lucene3.0 lucene3.0 lucene3.0 lucene3.0

    lucene学习lucene学习

    lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习...

    Lucene3.0特性,Lucene3.0特性

    Lucene3.0特性Lucene3.0特性

    lucene-core-7.7.0-API文档-中文版.zip

    赠送jar包:lucene-core-7.7.0.jar; 赠送原API文档:lucene-core-7.7.0-javadoc.jar; 赠送源代码:lucene-core-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.7.0.pom; 包含翻译后的API文档:lucene...

    lucene讲义 叫你用lucene算法

    lucene学习教程lucene讲义 叫你用lucene算法

    lucene.NET 中文分词

    lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮

    lucene-core-7.2.1-API文档-中文版.zip

    赠送jar包:lucene-core-7.2.1.jar; 赠送原API文档:lucene-core-7.2.1-javadoc.jar; 赠送源代码:lucene-core-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-core-7.2.1.pom; 包含翻译后的API文档:lucene...

    lucene3源码分析

    lucene3源码分析

    Lucene4.X第九讲-Lucene搜索深入实战

    本课程由浅入深的介绍了Lucene4的发展历史,开发环境搭建,分析lucene4的中文分词原理,深入讲了lucenne4的系统架构,分析lucene4索引实现原理及性能优化,了解关于lucene4的搜索算法优化及利用java结合lucene4实现...

    lucene4.2 jar包

    lucene-analyzers-common-4.2.0.jar; lucene-analyzers-kuromoji-4.2.0.jar; lucene-analyzers-phonetic-4.2.0.jar; lucene-codecs-4.2.0.jar; lucene-core-4.2.0.jar; lucene-grouping-4.2.0.jar; lucene-...

    Annotated Lucene 中文版 Lucene源码剖析

    Annotated Lucene 中文版 Lucene源码剖析

    Lucene实战

    《Lucene实战(第2版)》基于Apache的Lucene 3.0,从Lucene核心、Lucene应用、案例分析3个方面详细系统地介绍了Lucene,包括认识Lucene、建立索引、为应用程序添加搜索功能、高级搜索技术、扩展搜索、使用Tika提取文本...

    lucene例子(一个完整的,lucene例子)(lucenetest.rar,lucene,全文检索,lucene例子)

    lucenetest.rar,lucene,全文检索,lucene例子 lucenetest.rar,lucene,全文检索,lucene例子lucenetest.rar,lucene,全文检索,lucene例子

    Lucene时间区间搜索

    c#下实现Lucene时间区间查询匹配。主要还是对Lucene查循对像Query的实现

    Lucene3.5源码jar包

    Lucene3.5全部源码,打包jar文件,可以直接打开查看源码,Lucene开发必备

    lucene-core-2.9.4,lucene-core-3.0.2,lucene-core-3.0.3,lucene-core-3.4.0

    lucene-core-2.9.4,lucene-core-3.0.2,lucene-core-3.0.3,lucene-core-3.4.0

    lucene in action_中文版(lucene实战)

    本书深入浅出地介绍了Lucene——一个开源的使用Java语言编写的全文搜索引擎开发包。它通过浅显的语言、大量的图注、丰富的代码示例,以及清晰的结构为读者呈现出作为优秀开源项目的Lucene 所体现的强大功能。全书共...

Global site tag (gtag.js) - Google Analytics