lucene 3.0 中的demo项目部署

liuxinglanyue

浏览: 547754 次
性别:
来自: 杭州

最近访客更多访客>>

hui963966800

lhc98

guoshun0321

kidding87

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

2011-02 ( 10)
2011-01 ( 22)
2010-12 ( 165)
更多存档...

博客分类：

lucene

lucene Tomcat Apache SQL Oracle

转自：bjqincy

1 在myEclipise 建立一个web 工程，将lucene-3.0.2\src中的code 粘贴到 src中。将lucene-3.0.2\src\jsp下面的文件黏贴到 webroot 下面，将 lucene-core-3.0.2.jar;lucene-demos-3.0.2.jar 拷贝的lib下，添加jar

在项目中建立一个文件夹 index 文件夹，

2 org.apache.lucene.demo 下的 IndexHTML。

java run as -run as dialig ，

在 arc 参数中写

-create -index D:\Tomcat\tomcat5\webapps\lucene\index D:\Tomcat\tomcat5\webapps\lucene\docs　

提交。

-create -index 生成的索引文件位置被索引的文件

该路径是根据

运行

adding D:/Tomcat/tomcat5/webapps/lucene/docs/demo3.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/demo4.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/fileformats.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/gettingstarted.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/index.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/linkmap.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/lucene-contrib/index.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/queryparsersyntax.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/scoring.html
adding D:/Tomcat/tomcat5/webapps/lucene/docs/skin/images/README.txt
adding D:/Tomcat/tomcat5/webapps/lucene/docs/skin/note.txt
adding D:/Tomcat/tomcat5/webapps/lucene/docs/systemrequirements.html
Optimizing index...
93110 total milliseconds

生成索引文件

4 修改 configuration.jsp 文件改成生成的索引文件位置

String indexLocation = "D:\\Tomcat\\tomcat5\\webapps\\lucene\\index";
String appfooter = "Apache Lucene Template WebApp 1.0";

5 运行

-----------------------------

6 写的两个，lucenc 搜索db， lucene 搜索 txt 文件

lucenc 搜索db

JdbcUtil

package com.cee.util.db;

import java.sql.*;

public class JdbcUtil {

	private static Connection con;
	private static Statement stm;
	private static ResultSet rs;
	static {
		try {
			String d = "oracle.jdbc.driver.OracleDriver";
			Class.forName(d);
			getConnection();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static Connection getConnection() {
		String url = "jdbc:oracle:thin:@192.168.2.221:1521:ora92";
		String username = "bzzyeip";
		String pwd = "bzzyeip";
		con = null;
		try {
			con = DriverManager.getConnection(url, username, pwd);
		} catch (Exception e) {
			e.printStackTrace();
		}
		return con;
	}

	public static ResultSet getResult(String sql) {
		try {
			stm = con.createStatement();
			rs = stm.executeQuery(sql);
			return rs;
		} catch (SQLException e) {
			e.printStackTrace();
		}
		return null;

	}

	public static void printRs(ResultSet rs) {
		try {
			StringBuffer sb = new StringBuffer();
			ResultSetMetaData md = rs.getMetaData();
			int cols = md.getColumnCount();
			while (rs.next()) {
				for (int i = 1; i <= cols; i++) {
					sb.append(md.getColumnName(i) + "=");
					sb.append(rs.getString(i) + " ");
				}
				sb.append("\n");
			}
			System.out.println(sb.toString());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static void close() {
		try {
			if (rs != null)
				rs.close();
		} catch (Exception e1) {
			e1.printStackTrace();
		}
		try {
			if (stm != null)
				stm.close();
		} catch (Exception e1) {
			e1.printStackTrace();
		}
		try {
			if (con != null)
				con.close();
		} catch (Exception e1) {
			e1.printStackTrace();
		}
	}

	public static void main(String[] args) throws SQLException {
		// TODO Auto-generated method stub
		String sql = "select * from bzpt_book t";
		ResultSet rs = getResult(sql);
		printRs(rs);
		close();
	}

}

TestQureryDB

先要创建表

表的结构是

package com.cee.util.db;

import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class TestQureryDB {

	/**
	 * @throws IOException
	 * @throws LockObtainFailedException
	 * @throws CorruptIndexException
	 * @throws SQLException
	 * 
	 */
	public final static String indexDir = "d:\\TestLucene\\indexDB";

	public static void createIndex() throws CorruptIndexException,
			LockObtainFailedException, IOException, SQLException {
		Date start = new Date();
		File index = new File(indexDir);
		index.mkdir();
		IndexWriter writer = new IndexWriter(FSDirectory.open(index),
				new StandardAnalyzer(Version.LUCENE_CURRENT), true,
				IndexWriter.MaxFieldLength.LIMITED);
		writer.setUseCompoundFile(false);

		String sql = "select * from bzpt_book t";
		ResultSet rs = JdbcUtil.getResult(sql);

		while (rs.next()) {
			Document doc = new Document();
			doc.add(new Field("id", String.valueOf(rs.getString("id")),
					Field.Store.YES, Field.Index.NO));
			doc.add(new Field("cbs", String.valueOf(rs.getString("cbs")),
					Field.Store.YES, Field.Index.ANALYZED));
			//System.out.println(rs.getString("cbs"));
			doc.add(new Field("bz", String.valueOf(rs.getString("bz")),
					Field.Store.YES, Field.Index.ANALYZED));
			writer.addDocument(doc);

		}
		int numIndexed = writer.maxDoc();
		System.out.print(numIndexed);
		Date end = new Date();

		System.out.print(end.getTime() - start.getTime());
		writer.optimize();
		writer.close();

	}

	public static String search(Searcher searcher, String[] q)
			throws IOException, ParseException {
		StringBuffer sb = new StringBuffer();

		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
		String[] fields = { "cbs" };
		Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, q,
				fields, analyzer);
		TopDocs topDocs = searcher.search(query, 100);// 100是显示队列的Size
		ScoreDoc[] hits = topDocs.scoreDocs;
		System.out.println("共有" + searcher.maxDoc() + "条索引，命中" + hits.length
				+ "条");
		for (int i = 0; i < hits.length; i++) {
			int DocId = hits[i].doc;
			Document document = searcher.doc(DocId);
			sb.append(document.get("id"));
			sb.append(",");
		}
		return sb.toString().substring(0, sb.toString().length() - 1);
	}

	/**
	 * @param args
	 * @throws SQLException
	 * @throws IOException
	 * @throws LockObtainFailedException
	 * @throws CorruptIndexException
	 * @throws ParseException
	 */
	public static void main(String[] args) throws CorruptIndexException,
			LockObtainFailedException, IOException, SQLException,
			ParseException {
		// TODO Auto-generated method stub
		createIndex();
		Searcher searcher = new IndexSearcher(FSDirectory.open(new File(
				indexDir)), true);
		String[] q = { "中国" };
		long start = new Date().getTime();
		System.out.println(search(searcher, q));
		long end = new Date().getTime();
		System.out.println("花费时间：" + (double) (end - start) / 1000 + "秒");

	}

}

查询txt

package com.cee.util.db;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class TestQueryFile {

	/**
	 * @param args
	 */
	public final static String indexDirSource = "d:\\TestLucene\\fileSource";
	public final static String indexDirObj = "d:\\TestLucene\\fileIndex";

	/**
	 * 生成txt 文件的 index
	 */
	private static void createTextIndex() {

		File fileDir = new File(indexDirSource);
		File indexDir = new File(indexDirObj);
		if (fileDir.exists() == false) {
			System.out.println("不存在" + indexDirSource + "目录");
			System.exit(1);
		}
		if (indexDir.exists() == false) {
			indexDir.mkdir();
		}
		Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

		IndexWriter indexWriter = null;
		long startTime = 0;
		try {
			indexWriter = new IndexWriter(FSDirectory.open(indexDir),
					luceneAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);

			File[] textFiles = fileDir.listFiles();// 获得 文件
			startTime = new Date().getTime();

			// 增加document到索引去
			for (int i = 0; i < textFiles.length; i++) {
				if (textFiles[i].isFile()
						&& textFiles[i].getName().endsWith(".txt")) {
					System.out.println("File "
							+ textFiles[i].getCanonicalPath() + "正在被索引....");
					String temp = FileReaderAll(
							textFiles[i].getCanonicalPath(), "GBK");//转化成GBK
					System.out.println(temp);
					Document document = new Document();
					// 将txt 文件写到 Document中
					Field FieldPath = new Field("path", textFiles[i].getPath(),
							Field.Store.YES, Field.Index.NO);
					Field FieldBody = new Field("body", temp, Field.Store.YES,
							Field.Index.ANALYZED,
							Field.TermVector.WITH_POSITIONS_OFFSETS);
					document.add(FieldPath);
					document.add(FieldBody);
					indexWriter.addDocument(document);
				}
			}

		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				// optimize()方法是对索引进行优化
				indexWriter.optimize();
				indexWriter.close();
				// 测试一下索引的时间
				long endTime = new Date().getTime();
				System.out.println("这花费了" + (endTime - startTime)
						+ " 毫秒来把文档增加到索引里面去!" + fileDir.getPath());
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	private static String FileReaderAll(String FileName, String charset)
			throws IOException {
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				new FileInputStream(FileName), charset));
		String line = new String();
		String temp = new String();

		while ((line = reader.readLine()) != null) {
			temp += line;
		}
		reader.close();
		return temp;
	}

	public static void main(String[] args) throws Exception {
		createTextIndex();
		String queryString = "多伦多";
		Query query = null;
		IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(
				indexDirObj)), true);// read-only
		String fields = "body";
		try {
			QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, fields,
					new StandardAnalyzer(Version.LUCENE_CURRENT));// 
			query = qp.parse(queryString);
		} catch (ParseException e) {
		}
		if (searcher != null) {
			TopDocs topDocs = searcher.search(query, 100);// 100是显示队列的Size
			ScoreDoc[] hits = topDocs.scoreDocs;
			System.out.println("共有" + searcher.maxDoc() + "条索引，命中"
					+ hits.length + "条");
		}
	}
}

分享到：

JDK里的设计模式 | 真正属于主题爬虫的圈子

2010-12-15 22:02
浏览 940
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论