`
wbj0110
  • 浏览: 1551769 次
  • 性别: Icon_minigender_1
  • 来自: 上海
文章分类
社区版块
存档分类
最新评论

开源搜索引擎Solr的快速搭建及集成到企业门户最佳实施方案

阅读更多
笔者经过研究查阅solr官方相关资料经过两周的研究实现了毫秒级百万数据的搜索引擎的搭建并引入到企业门户。现将实施心得和步骤分享一下。

1.      jdk1.6

安装jdk1.6到系统默认目录下X:\qc\Java目录下(注意要点:配置好环境变量)。

2.      tomcat安装

a)      安装tomcatX:\qc\tomcat6目录下。

b)     配置server.xml

Connector port="80" protocol="HTTP/1.1" connectionTimeout="20000" redirectPort="8443" URIEncoding="UTF-8" />

3.      下载并部署solr1.4.1

a)      拷贝本地solr(注意该solr文件夹并非war包而是solr连接数据库的配置包)目录到目标服务器X:\qc目录下。

l  data-onfig.xml

<dataConfig>

    <dataSource name="itwhhsol" type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1:3306/itwhhsol" user="root" password="123ttt"/>

<!--在这里配置要连接的数据库-->

 

    <document name="article">

            <entity name="article" dataSource="itwhhsol"

                  query="select * from article" pk="article_id"

            deltaQuery="select * from article where update_time > '${dataimporter.last_index_time}'">

                  <field column="article_id" name="id" />

            </entity>

    </document>

<!--配置要索引的表-->

</dataConfig>

l  solrconfig.xml

<dataDir>${solr.data.dir:f:/qc/solr/data}</dataDir>

<!--配置搜索引擎索引后的数据-->

<!--add for dih-->

    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">

           <lst name="defaults">

                  <str name="config">f:/qc/solr/conf/data-config.xml</str>

           </lst>

    </requestHandler>

l  schema.xml

field name="id" type="string" indexed="true" stored="true" required="true" />

<field name="user_id" type="string" indexed="true" stored="true"/>

<field name="article_id" type="string" indexed="true" stored="true"/>

<field name="update_time" type="date" indexed="true" stored="true"/>

<field name="url" type="string" indexed="true" stored="true"/>

<field name="title" type="textMaxWord" indexed="true" stored="true"/>

<field name="content" type="textMaxWord" indexed="true" stored="true"/>

<field name="in_out_flag" type="string" indexed="true" stored="true"/>

<field name="article_state" type="string" indexed="true" stored="true"/>

<field name="article_click" type="int" indexed="true" stored="true" />

<field name="clerk_id" type="string" indexed="true" stored="true"/>

<field name="sort_id" type="string" indexed="true" stored="true"/>

<field name="keyword" type="textMaxWord" indexed="true" stored="true"/>

<field name="clerk_suggest" type="textMaxWord" indexed="true" stored="true"/>

<copyField source="title" dest="text"/>

<copyField source="content" dest="text"/>

<copyField source="keyword" dest="text"/>

<copyField source="clerk_suggest" dest="text"/>

b)     部署war包solr到tomcat下

X:\qc\tomcat6\webapps下放置ache-solr-1.4.0\example\webapps\solr.war

c)      配置solr.xml

l  创建目录X:\qc\Tomcat6\conf\Catalina\localhost

l  增加solr.xml内容如下:

<?xml version="1.0" encoding="UTF-8"?>

<Context docBase="X:/qc/Tomcat6/webapps/solr.war" debug="0" crossContext="true" >

<Environment name="solr/home" type="java.lang.String" value="X:/qc/solr/" override="true" />

</Context>

4.      启动f:\qc\tomcat6\bin\tomcat6.exe(dos控制台)(注意这里tomcat安装时一定要改成tomcat6或者tomcat不能有横线或者空格否则可能导致solr无法正常索引哦)

5.      索引全部数据:http://127.0.0.1/solr/dataimport?command=full-import(可能需要重新刷一次窗口才能真正的索引全部数据哦)

6.测试url:http://127.0.0.1/solr/admin/如果可以看到solr管理界面说明搭建成功了

7.把如下代码copy到本地另存成jsp放到你的工程里就可以实现毫秒级搜索百万数据量啦:

<%@ page language="java" pageEncoding="UTF-8"%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html:html locale="true">
<STYLE>BODY {
 FONT-FAMILY: arial
}
TD {
 FONT-FAMILY: arial
}
.p1 {
 FONT-FAMILY: arial
}
.p2 {
 FONT-FAMILY: arial
}
.i {
 FONT-FAMILY: arial
}
BODY {
 PADDING-RIGHT: 0px; PADDING-LEFT: 0px; PADDING-BOTTOM: 0px; MARGIN: 0px; COLOR: #000; PADDING-TOP: 6px; POSITION: relative; BACKGROUND-COLOR: #fff
}
INPUT {
 PADDING-BOTTOM: 0px; PADDING-TOP: 0px; moz-box-sizing: border-box; webkit-box-sizing: border-box; box-sizing: border-box
}
TABLE {
 BORDER-TOP-WIDTH: 0px; BORDER-LEFT-WIDTH: 0px; BORDER-BOTTOM-WIDTH: 0px; BORDER-RIGHT-WIDTH: 0px
}
TD {
 FONT-SIZE: 9pt; LINE-HEIGHT: 18px
}
EM {
 COLOR: #cc0000; FONT-STYLE: normal
}
A EM {
 TEXT-DECORATION: underline
}
.f14 {
 FONT-SIZE: 14px
}
.f10 {
 FONT-SIZE: 10.5pt
}
.f16 {
 FONT-SIZE: 16px; FONT-FAMILY: Arial
}
.c {
 COLOR: #7777cc
}
.p1 {
 MARGIN-LEFT: -12pt; LINE-HEIGHT: 120%
}
.p2 {
 MARGIN-LEFT: -12pt; WIDTH: 100%; LINE-HEIGHT: 120%
}
.i {
 FONT-SIZE: 16px; LINE-HEIGHT: 1.4em; HEIGHT: 24px
}
.t {
 COLOR: #0000cc; TEXT-DECORATION: none
}
A.t:hover {
 TEXT-DECORATION: underline
}
.p {
 PADDING-LEFT: 18px; FONT-SIZE: 14px; MARGIN: 0px 0px 20px; WORD-SPACING: 4px
}
.f {
 PADDING-LEFT: 15px; FONT-SIZE: 100%; WIDTH: 33.7em; WORD-BREAK: break-all; LINE-HEIGHT: 120%; WORD-WRAP: break-word
}
.h {
 MARGIN-LEFT: 8px; WIDTH: 100%
}
.s {
 PADDING-LEFT: 10px; WIDTH: 8%; HEIGHT: 25px
}
.m {
 FONT-SIZE: 100%; COLOR: #666
}
A.m:link {
 FONT-SIZE: 100%; COLOR: #666
}
A.m:visited {
 COLOR: #660066
}
.g {
 FONT-SIZE: 12px; COLOR: #008000
}
.r {
 WIDTH: 238px; CURSOR: hand; WORD-BREAK: break-all
}
.bi {
 MARGIN-BOTTOM: 12px; HEIGHT: 20px; BACKGROUND-COLOR: #d9e1f7
}
.pl {
 PADDING-RIGHT: 2px; PADDING-LEFT: 3px; FONT-SIZE: 14px; HEIGHT: 8px
}
.Tit {
 FONT-SIZE: 14px; Z-INDEX: 200; POSITION: relative; HEIGHT: 21px
}
.Tit A {
 COLOR: #0000cc
}
.fB {
 FONT-WEIGHT: bold
}
.mo {
 FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
}
A.mo:link {
 FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
}
A.mo:visited {
 FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
}
.htb {
 MARGIN-BOTTOM: 5px
}
#ft {
 CLEAR: both; BACKGROUND: #e6e6e6; LINE-HEIGHT: 20px; TEXT-ALIGN: center
}
#ft {
 FONT-SIZE: 12px; COLOR: #77c; FONT-FAMILY: Arial
}
#ft  {
 FONT-SIZE: 12px; COLOR: #77c; FONT-FAMILY: Arial
}
#ft SPAN {
 COLOR: #666
}
FORM {
 Z-INDEX: 9; MARGIN: 0px; POSITION: relative
}
.jc A {
 COLOR: #cc0000
}
.btn {
 FONT-SIZE: 14px; MARGIN-LEFT: 3px; WIDTH: 5.6em; PADDING-TOP: 2px; HEIGHT: 2em
}
.i {
 VERTICAL-ALIGN: baseline
}
.btn {
 VERTICAL-ALIGN: baseline
}
UNKNOWN {
 TEXT-DECORATION: underline
}
#tb_mr {
 Z-INDEX: 200; CURSOR: pointer; COLOR: #0000cc; POSITION: relative
}
#tb_mr B {
 FONT-WEIGHT: normal; TEXT-DECORATION: underline
}
#tb_mr SMALL {
 FONT-SIZE: 11px
}
#more {
 BORDER-RIGHT: #9a99ff 1px solid; BORDER-TOP: #9a99ff 1px solid; DISPLAY: none; FONT-SIZE: 14px; Z-INDEX: 200; BACKGROUND: #fff; LEFT: 314px; OVERFLOW: hidden; BORDER-LEFT: #9a99ff 1px solid; WIDTH: 58px; BORDER-BOTTOM: #9a99ff 1px solid; POSITION: absolute; TOP: 22px; HEIGHT: 100px; outline: none
}
#more A {
 PADDING-RIGHT: 0px; DISPLAY: block; PADDING-LEFT: 7px; PADDING-BOTTOM: 0px; WIDTH: 4em; COLOR: #0001cf; LINE-HEIGHT: 24px; PADDING-TOP: 0px; HEIGHT: 25%; TEXT-DECORATION: none
}
#more A SPAN {
 FONT-FAMILY: "宋体"
}
#more A:hover {
 BACKGROUND: #d9e1f6
}
#more DIV {
 BACKGROUND: #ccccff; MARGIN: 0px 3px; OVERFLOW: hidden; HEIGHT: 1px
}
#out {
 MARGIN-LEFT: 880px; ZOOM: 1
}
#in {
 FLOAT: left; MARGIN-LEFT: -880px; POSITION: relative
}
#wrapper {
 ZOOM: 1; min-width: 880px
}
#sx {
 CURSOR: pointer; COLOR: #00c; TEXT-DECORATION: underline
}
#u {
 FONT-SIZE: 12px; Z-INDEX: 210; RIGHT: 10px; MARGIN: 0px; WHITE-SPACE: nowrap; POSITION: absolute; TOP: 0px; TEXT-ALIGN: right
}
.result {
 TABLE-LAYOUT: fixed; WIDTH: 34em
}
</STYLE>
<head><html:base />   
<title>搜索引擎</title>
 <meta http-equiv="pragma" content="no-cache">
 <meta http-equiv="cache-control" content="no-cache">
 <meta http-equiv="expires" content="0">   
 <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
 <meta http-equiv="description" content="This is my page">
   <script language="javascript" src="/OnLHS/jsp/lib/jquery.js"></script>
 <script language="javascript" src="/OnLHS/jsp/lib/pagination.js"></script>
 <link href="/OnLHS/jsp/lib/pagination.css" rel="stylesheet" type="text/css" />
  </head>
  <body onload="xmlhttpPost('/solr/select')">
  <div class=wa_mode id=s_nav  align="center"  style="width:100%;overflow:hidden;white-space:nowrap;text-overflow:ellipsis;" >
  <span class="STYLE1">&nbsp;<img src="/OnLHS/jsp/img/mobile.jpg" height="50"/><img src="/OnLHS/jsp/img/Logom1.png"/></span>
</div>
 <hr noshade="noshade"  color="#6699FF">
  <form action="show.jsp" name="f1" method="get" accept-charset="UTF-8" onSubmit="xmlhttpPost('/solr/select'); return false;">&nbsp;<h5>搜索内容:<input type="text" name="q" size="50" value="<%=request.getAttribute("key") %>">
      <input name="start" type="hidden" value="0">
      <input name="rows" type="hidden" value="10">
      <input name="indent" type="hidden" value="on">
    <input name="wt" type="hidden" value="">
      <input type="button" value=" 搜 索 " onClick="xmlhttpPost('/solr/select');">
      <input type="hidden" value=" get json " onClick="document.forms['f1'].wt.value='json';document.forms['f1'].submit();">
      <input type="hidden" value=" get xml " onClick="document.forms['f1'].wt.value='';document.forms['f1'].submit();">
      </h5>
  </form>
  <p>
  
    <div id="header" style="background-color: #D9E1F6; height: 15px;" align="left"></div>
    <div id="response">
     
    </div>
 <table>
 <tr>
 <td width="10"></td>
 <td>
  
     <table id="docs" class="tab" cellspacing="1">
            <tr>
            <td></td>
            <td></td>
            <td></td>
            <td></td>
            </tr>
        </table>
    
  </td> 
  </tr>
  <tr>
  <td colspan="2" align="center"><div align="center" id="pages"></div></td>
  </tr>
 </table>
   
   <script type="text/javascript">
  
   function xmlhttpPost(strURL) {
   if(document.forms['f1'].q.value.length==0||document.forms['f1'].q.value.replace(/(^\s*)|(\s*$)/g,"")=="")
    {
     alert("请输入您要检索的内容...");
        return false;
    }
    var xmlHttpReq = false;
    var self = this;
    if (window.XMLHttpRequest) { // Mozilla/Safari
        self.xmlHttpReq = new XMLHttpRequest();
    }
    else if (window.ActiveXObject) { // IE
        self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
    }
   
    var params = getstandardargs().concat(getquerystring());
    var strData = params.join('&');
   
    var header = document.getElementById("response");
    //header.innerHTML = strURL '?' strData;

    self.xmlHttpReq.open('get', strURL '?' strData '&time=' new Date().getTime(), true);
    self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
    self.xmlHttpReq.onreadystatechange = function() {
        if (self.xmlHttpReq.readyState == 4) {
            updatepage(self.xmlHttpReq.responseText);
        }
    }
    self.xmlHttpReq.send(null);
}

function getstandardargs() {
    var params = [
        'wt=json'
        , 'indent=on'
        , 'hl=true'
        , 'hl.fl='
        , 'fl=*,score'
        , 'start=0'
        , 'rows=100'
        ];

    return params;
}
function getquerystring() {
  var form = document.forms['f1'];
  var query = form.q.value;
  qstr = 'q=' encodeURI(query);    //escape
  return qstr;
}

// this function does all the work of parsing the solr response and updating the page.
function updatepage(str){
  //document.getElementById("response").innerHTML = str;
   var rsp = eval("(" str ")");
  // use eval to parse Solr's JSON response
  //eval拼接返回一个合理的字符串
  parse(rsp);
}

function parse(j) {
    var header = document.getElementById("header");
    var rh = j.responseHeader;
    var header_str = " 搜索关键字: \"" rh.params.q "\", 共为您找到: " j.response.numFound "条相关的数据, 耗时: " rh.QTime "ms";
    header.innerHTML = "<font color=#000000>" "&nbsp;&nbsp;" header_str "</font>";
    var docs = j.response.docs;
     $.pagination('pages',1,12, docs.length, test, docs);
   }
function test(e)
 {
   
  $.pagination('pages', e.data.current, e.data.pagination.pageSize, e.data.pagination.totalRecord,test,e.data.docs)
    }
   </script>
  </body>
 
</html:html>

 引用:http://blog.chinaunix.net/uid-25723371-id-3221379.html
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics