- 浏览: 2493028 次
- 性别:
- 来自: 成都
文章分类
最新评论
-
nation:
你好,在部署Mesos+Spark的运行环境时,出现一个现象, ...
Spark(4)Deal with Mesos -
sillycat:
AMAZON Relatedhttps://www.godad ...
AMAZON API Gateway(2)Client Side SSL with NGINX -
sillycat:
sudo usermod -aG docker ec2-use ...
Docker and VirtualBox(1)Set up Shared Disk for Virtual Box -
sillycat:
Every Half an Hour30 * * * * /u ...
Build Home NAS(3)Data Redundancy -
sillycat:
3 List the Cron Job I Have>c ...
Build Home NAS(3)Data Redundancy
SOLR Performance and SolrJ(2)Compress and Post File
Then the idea is to compress the post data first and then send that to SOLR indexer or we can have SOLR cloud and shard to improve the band of the indexer machines.
Finally, we decide to generate the XML file, compress that and SCP the compressed file to indexer machine. On the indexer machine, we can have a monitor to unzip the file and post to the SOLR localhost. That is localhost, not the network band then.
I am using XMLWriter to generate the SOLR XML, similar code here
public function __construct($ioc)
{
$this->ioc = $ioc;
$logger = $this->ioc->getService("logger");
$config = $this->ioc->getService("config");
$this->xmlWriter = new \XMLWriter();
}
public function addStart($file){
$this->xmlWriter->openURI($file);
$this->xmlWriter->setIndent(true);
$this->xmlWriter->startElement('update');
}
public function __construct($ioc)
{
$this->ioc = $ioc;
$logger = $this->ioc->getService("logger");
$config = $this->ioc->getService("config");
$this->xmlWriter = new \XMLWriter();
}
public function addStart($file){
$this->xmlWriter->openURI($file);
$this->xmlWriter->setIndent(true);
$this->xmlWriter->startElement('update');
}
Zip the file and SCP to the target
system("gzip -f {$file_path}");
system("scp -i /share/ec2.id -o StrictHostKeyChecking=no {$file_path}.gz ec2-user@{$ip}:" . $this->XML_FOLDER);
unlink($file_path . ".gz");
On the target machine, we will watch the directory and exec the post curl request to the SOLR server. PHP is really easy in this situation.
$delta_files = array();
exec('ls -tr --time=ctime /mnt/ad_feed/*.gz 2>/dev/null', $delta_files);
$delta_count = count($delta_files);
if(DEBUG) echo "delta count: ".$delta_count."\n";
if($delta_count == 0) continue;
Check how many process are working
$curl_processes = array();
exec('ps -ef | grep "curl --fail http://localhost:8983/job/update -d @/mnt/ad_feed/" | grep -v grep', $curl_processes);
$curl_count = count($curl_processes);
if(DEBUG) echo "curl count: ".$curl_count."\n";
if($curl_count >= MAX_PROCS) continue;
Execute the command in the backend, then we can use exec to execute multiple process
$curl_command = "php delta_curl.php $cur_file > /dev/null 2>&1 &"; //parallel processes
exec($curl_command);
Post XML file
exec("curl --fail http://localhost:8983/job/update -d @{$argv[1]} -H Content-type:application/xml", $output, $status);
if(0 != $status)
{
send_delta_alert($argv[1]);
}
unlink($argv[1]);
The sample format of the XML will be as follow:
<update>
<delete>
<id>2136083108</id>
<id>2136083113</id>
<id>2136083114</id>
</delete>
<add>
<doc>
<field name="id">2136083xx</field>
<field name="customer_id">2xx</field>
<field name="pool_id">20xx</field>
<field name="source_id">23xx</field>
<field name="campaign_id">3xxx</field>
<field name="segment_id">0</field>
<field name="job_reference">468-1239-xxxx4</field>
<field name="title"><![CDATA[CDL-A xxxxx ]]></field>
<field name="url"><![CDATA[http://www.xxxxxx]]></field>
<field name="company_id">11xxx7</field>
<field name="company">Hub xxxxx</field>
<field name="title_com">CDL-xxxx</field>
<field name="campaign_com">3396xxx</field>
<field name="zipcode">3xxxx</field>
<field name="cities">Atlanta,GA</field>
<field name="jlocation">33.8444,-84.4741</field>
<field name="state_id">11</field>
<field name="cpc">125</field>
<field name="reg_cpc">130</field>
<field name="qq_multiplier">0</field>
<field name="j2c_apply">0</field>
<field name="created">2016-09-02T06:02:42Z</field>
<field name="posted">2016-09-02T06:02:42Z</field>
<field name="experience">2</field>
<field name="salary">150</field>
<field name="education">2</field>
<field name="jobtype">1</field>
<field name="quality_score">60</field>
<field name="boost_factor">20.81</field>
<field name="industry">20</field>
<field name="industries">20</field>
<field name="paused">false</field>
<field name="email"></field>
<field name="srcseg_id">23xx</field>
<field name="srccamp_id">23xxx</field>
<field name="top_spot_type">7</field>
<field name="top_spot_industries">20</field>
<field name="is_ad">2</field>
<field name="daily_capped">0</field>
<field name="mobile_friendly">1</field>
<field name="excluded_company">false</field>
</doc>
</add>
</update>
References:
Then the idea is to compress the post data first and then send that to SOLR indexer or we can have SOLR cloud and shard to improve the band of the indexer machines.
Finally, we decide to generate the XML file, compress that and SCP the compressed file to indexer machine. On the indexer machine, we can have a monitor to unzip the file and post to the SOLR localhost. That is localhost, not the network band then.
I am using XMLWriter to generate the SOLR XML, similar code here
public function __construct($ioc)
{
$this->ioc = $ioc;
$logger = $this->ioc->getService("logger");
$config = $this->ioc->getService("config");
$this->xmlWriter = new \XMLWriter();
}
public function addStart($file){
$this->xmlWriter->openURI($file);
$this->xmlWriter->setIndent(true);
$this->xmlWriter->startElement('update');
}
public function __construct($ioc)
{
$this->ioc = $ioc;
$logger = $this->ioc->getService("logger");
$config = $this->ioc->getService("config");
$this->xmlWriter = new \XMLWriter();
}
public function addStart($file){
$this->xmlWriter->openURI($file);
$this->xmlWriter->setIndent(true);
$this->xmlWriter->startElement('update');
}
Zip the file and SCP to the target
system("gzip -f {$file_path}");
system("scp -i /share/ec2.id -o StrictHostKeyChecking=no {$file_path}.gz ec2-user@{$ip}:" . $this->XML_FOLDER);
unlink($file_path . ".gz");
On the target machine, we will watch the directory and exec the post curl request to the SOLR server. PHP is really easy in this situation.
$delta_files = array();
exec('ls -tr --time=ctime /mnt/ad_feed/*.gz 2>/dev/null', $delta_files);
$delta_count = count($delta_files);
if(DEBUG) echo "delta count: ".$delta_count."\n";
if($delta_count == 0) continue;
Check how many process are working
$curl_processes = array();
exec('ps -ef | grep "curl --fail http://localhost:8983/job/update -d @/mnt/ad_feed/" | grep -v grep', $curl_processes);
$curl_count = count($curl_processes);
if(DEBUG) echo "curl count: ".$curl_count."\n";
if($curl_count >= MAX_PROCS) continue;
Execute the command in the backend, then we can use exec to execute multiple process
$curl_command = "php delta_curl.php $cur_file > /dev/null 2>&1 &"; //parallel processes
exec($curl_command);
Post XML file
exec("curl --fail http://localhost:8983/job/update -d @{$argv[1]} -H Content-type:application/xml", $output, $status);
if(0 != $status)
{
send_delta_alert($argv[1]);
}
unlink($argv[1]);
The sample format of the XML will be as follow:
<update>
<delete>
<id>2136083108</id>
<id>2136083113</id>
<id>2136083114</id>
</delete>
<add>
<doc>
<field name="id">2136083xx</field>
<field name="customer_id">2xx</field>
<field name="pool_id">20xx</field>
<field name="source_id">23xx</field>
<field name="campaign_id">3xxx</field>
<field name="segment_id">0</field>
<field name="job_reference">468-1239-xxxx4</field>
<field name="title"><![CDATA[CDL-A xxxxx ]]></field>
<field name="url"><![CDATA[http://www.xxxxxx]]></field>
<field name="company_id">11xxx7</field>
<field name="company">Hub xxxxx</field>
<field name="title_com">CDL-xxxx</field>
<field name="campaign_com">3396xxx</field>
<field name="zipcode">3xxxx</field>
<field name="cities">Atlanta,GA</field>
<field name="jlocation">33.8444,-84.4741</field>
<field name="state_id">11</field>
<field name="cpc">125</field>
<field name="reg_cpc">130</field>
<field name="qq_multiplier">0</field>
<field name="j2c_apply">0</field>
<field name="created">2016-09-02T06:02:42Z</field>
<field name="posted">2016-09-02T06:02:42Z</field>
<field name="experience">2</field>
<field name="salary">150</field>
<field name="education">2</field>
<field name="jobtype">1</field>
<field name="quality_score">60</field>
<field name="boost_factor">20.81</field>
<field name="industry">20</field>
<field name="industries">20</field>
<field name="paused">false</field>
<field name="email"></field>
<field name="srcseg_id">23xx</field>
<field name="srccamp_id">23xxx</field>
<field name="top_spot_type">7</field>
<field name="top_spot_industries">20</field>
<field name="is_ad">2</field>
<field name="daily_capped">0</field>
<field name="mobile_friendly">1</field>
<field name="excluded_company">false</field>
</doc>
</add>
</update>
References:
发表评论
-
Stop Update Here
2020-04-28 09:00 270I will stop update here, and mo ... -
NodeJS12 and Zlib
2020-04-01 07:44 436NodeJS12 and Zlib It works as ... -
Docker Swarm 2020(2)Docker Swarm and Portainer
2020-03-31 23:18 319Docker Swarm 2020(2)Docker Swar ... -
Docker Swarm 2020(1)Simply Install and Use Swarm
2020-03-31 07:58 331Docker Swarm 2020(1)Simply Inst ... -
Traefik 2020(1)Introduction and Installation
2020-03-29 13:52 297Traefik 2020(1)Introduction and ... -
Portainer 2020(4)Deploy Nginx and Others
2020-03-20 12:06 383Portainer 2020(4)Deploy Nginx a ... -
Private Registry 2020(1)No auth in registry Nginx AUTH for UI
2020-03-18 00:56 382Private Registry 2020(1)No auth ... -
Docker Compose 2020(1)Installation and Basic
2020-03-15 08:10 337Docker Compose 2020(1)Installat ... -
VPN Server 2020(2)Docker on CentOS in Ubuntu
2020-03-02 08:04 405VPN Server 2020(2)Docker on Cen ... -
Buffer in NodeJS 12 and NodeJS 8
2020-02-25 06:43 342Buffer in NodeJS 12 and NodeJS ... -
NodeJS ENV Similar to JENV and PyENV
2020-02-25 05:14 424NodeJS ENV Similar to JENV and ... -
Prometheus HA 2020(3)AlertManager Cluster
2020-02-24 01:47 370Prometheus HA 2020(3)AlertManag ... -
Serverless with NodeJS and TencentCloud 2020(5)CRON and Settings
2020-02-24 01:46 302Serverless with NodeJS and Tenc ... -
GraphQL 2019(3)Connect to MySQL
2020-02-24 01:48 214GraphQL 2019(3)Connect to MySQL ... -
GraphQL 2019(2)GraphQL and Deploy to Tencent Cloud
2020-02-24 01:48 400GraphQL 2019(2)GraphQL and Depl ... -
GraphQL 2019(1)Apollo Basic
2020-02-19 01:36 284GraphQL 2019(1)Apollo Basic Cl ... -
Serverless with NodeJS and TencentCloud 2020(4)Multiple Handlers and Running wit
2020-02-19 01:19 274Serverless with NodeJS and Tenc ... -
Serverless with NodeJS and TencentCloud 2020(3)Build Tree and Traverse Tree
2020-02-19 01:19 269Serverless with NodeJS and Tenc ... -
Serverless with NodeJS and TencentCloud 2020(2)Trigger SCF in SCF
2020-02-19 01:18 258Serverless with NodeJS and Tenc ... -
Serverless with NodeJS and TencentCloud 2020(1)Running with Component
2020-02-19 01:17 244Serverless with NodeJS and Tenc ...
相关推荐
Solr是一个独立的企业级搜索应用服务器,它对外提供类似于Web-service的API接口。...Solrj 是访问 Solr 的 Java 客户端,它提供添加、更新和查询Solr 索引的接口。http://wiki.chenlb.com/solr/doku.php?id=solrj
solr-solrj-4.9.0.jar
solr-solrj-4.4.0.jar
压缩文件里面有solr-solrj-4.10.3.jar和solr-solrj-5.0.0.jar两个jar
solr-solrj-6.6.0.jar
该文档主要是对solr1.4的配置,包含服务器的复制,分发,和分片
apache-solr-solrj-3.5.0.jar
solrJ是Java连接solr进行查询检索和索引更新维护的jar包。
solr-solrj 5.0.0 和自己搭建的solr服务交互
本篇文章主要介绍了详解java整合solr5.0之solrj的使用 ,具有一定的参考价值,有兴趣的可以了解下
2.solr启动&停止 solr-8.0.0\bin目录下执行cmd solr start 启动 solr stop -all 3.创建solrcore \solr-8.0.0\server\solr\目录下新建文件夹(如:new_db) 将example\example-DIH\solr\db下的文件 copy到/new_core下 ...
solr详细配置教程与solrj的使用
solrj工具类封装,包括条件批量查询,批量增删改,分段修改。
solr-solrj-4.10.3.jar。
solr的核心jar,大家可以一起好好学习一下,还是很优秀的
jar包,亲测可用
jar包,亲测可用
下载后会获得名为:solr_core.4.6.0 的zip包,解压后会获得solr-core-4.6.0.jar和 solr-solrj-4.6.0.jar两个文件,搭建solr全文检索环境必须要添加的包
Solr是一个独立的企业级搜索应用服务器,它对外提供类似于Web-service的API接口。用户可以通过http请求,向搜索引擎服务器提交一定格式的XML文
The following is a sample use of highlighting on a search for Corgan in the artist MusicBrainz data set. Recall that the mb_artists request ...match against the artist name, alias, and members fields