function one_crawl($form, $form_state) {
$link = 'http://blog.eau-thermale-avene.cn/post/536.html';
blog_crawler($link);
}
//save url content to node:blog.
function blog_crawler($link, &$context = array(), $summary = NULL) {
if (empty($link)) return;
watchdog('crawler link', $link);
$default = array(
'uid' => 1,
);
$fetch = new BlogFetch('blog', $link, $default);
$blog = str_get_html($fetch->html);
//Blog Pushlish date.
$date = $blog->find('#divMain h2.post-title span.post-date', 0)->plaintext;
$blog->find('#divMain h2.post-title span.post-date', 0)->innertext = '';
//dsm($date);
$fetch->set('created', strtotime($date));
//Entity property changed doesn't support writing。。。
//$fetch->set('changed', strtotime($date));
//Blog Title.
$blog->find('#divMain h2.post-title span', 0)->outertext = '';
$title = $blog->find('#divMain h2.post-title', 0)->innertext;
$title = str_replace('Q&A', 'Q&A', $title);
$fetch->set('title', $title);
//Blog Original URL.
$fetch->set('field_link', array('url' => $link));
//Blog Tags.
$terms = array();
$tags = $blog->find('#divMain div.post-body .post-info a');
foreach ($tags as $tag) {
if(!empty($tag->plaintext)) {
$terms[] = avene_taxonomy($tag->plaintext, 'blog_tags');
}
}
$fetch->set('field_blog_tags', $terms);
//Blog Category..
$cate = drupal_substr(trim($blog->find('#divMain div.post-body .post-footer-category', 0)->plaintext),3);
$term = avene_taxonomy($cate, 'blog_category');
$fetch->set('field_blog_type', $term);
//Blog Body.
$blog->find('#divMain div.post-body .post-info', 0)->outertext = '';
$blog->find('#divMain div.post-body .post-footer', 0)->outertext = '';
$body = $blog->find('#divMain div.post-body', 0)->innertext;
$fetch->set('body', array('format' => 'full_html', 'summary' => $summary, 'value' => $body));
//Blog comments...
foreach ($blog->find('.msg-boxes .msg-box-content') as $c) {
if(empty($c->plaintext)) return;
$comment->nid = $fetch->entity->nid; // nid of a node you want to attach a comment to
$comment->cid = 0; // leave it as is
$comment->pid = 0; // parent comment id, 0 if none
$comment->uid = 0; // user's id, who left the comment
//$comment->mail = 'email@example.com'; // user's email
//$comment->name = 'User name'; // If user is authenticated you can omit this field, it will be auto-populated, if the user is anonymous and you want to name him somehow, input his name here
//$comment->thread = '01/'; // OPTIONAL. If you need comments to be threaded you can fill this value. Otherwise omit it.
$c->find('.msgtime a', 0)->outertext = '';
$comment->created = strtotime(trim($c->find('.msgtime', 0)->plaintext));
$comment->is_anonymous = 1; // leave it as is
$comment->status = COMMENT_PUBLISHED; // We auto-publish this comment
$comment->language = LANGUAGE_NONE; // The same as for a node
$comment->subject = '';
$val = filter_var($c->find('.msgarticle', 0)->plaintext, FILTER_SANITIZE_SPECIAL_CHARS);
//$val = preg_replace('/[^(\x20-\x7F)]*/','', $val);
$replace = array( ''=>'');
$val = strtr($val, $replace);
$comment->comment_body[$comment->language][0]['value'] = $val;
$comment->comment_body[$comment->language][0]['format'] = 'full_html';
comment_submit($comment); // saving a comment
comment_save($comment);
}
//$context['results']['processed']++;
$context['message'] = 'fetching ' . $fetch->entity->title;
}
class BlogFetch {
protected $type;
protected $url;
protected $args;
var $html;
var $entity;
function __construct($type, $url, $args = array()) {
$this->type = $type;
$this->args = $args;
$this->url = $url;
$this->fetchData();
$this->buildEntity();
}
//Init entity...
function buildEntity() {
$args = array('type' => $this->type) + $this->args;
$this->entity = entity_create('node', $args);
}
function fetchData() {
$request = drupal_http_request($this->url);
if ($request->code == 200) {
$this->html = $request->data;
} else {
throw new Exception('Failure on fetch:' . $this->url . '. http code:' . $request->code);
return FALSE;
}
}
public function set($property, $value, $type = NULL) {
$wrapper = entity_metadata_wrapper('node', $this->entity);
$wrapper->{$property}->set($value);
$wrapper->save();
}
function __destruct() {
//$this->html->clear();
$this->html = NULL;
$this->entity = NULL;
}
}
相关推荐
freebase的实体id到真实实体的映射 数据集
HW1 entity Resolution数据分析训练题,入门数据科学的经典题目
Entity Framework完整版教程,从初级到中级,再到高级,循序渐进。
关于使用EntityFramework的注意事项在Performance Considerations for Entity Framework 这篇文章中有详细介绍,其中生成视图操作耗时比较多,在Entity Framework执行查询或者对数据库进行写操作的时候,必须生成...
本教程介绍如何创建使用 Entity Framework (EF) Core 进行数据访问的 ASP.NET Core Razor Pages Web 应用。 Razor 页面是 ASP.NET Core MVC 的一个新特性,它可以使基于页面的编码方式更简单高效。 Entity Framework...
Entity Framework技术系列之1:数据访问技术概述
Entity Framework 6 Recipes Entity Framework 6 Recipes
Entity Framework Repository(含依赖注入)
ADO.NET Entity Framework 以 Entity Data Model (EDM) 为主,将数据逻辑层切分为三块,分别为 Conceptual Schema, Mapping Schema 与 Storage Schema 三层,其上还有 Entity Client,Object Context 以及 LINQ 可以...
本书是关于Entity framework code first 的详细介绍,在本书中,你可以学到从无到有的创建基于Entity framework code first的项目
TransE数据集+代码entity2id.txt,relation2id.txt,train.txt
根据mysql数据库生成entity、mapper、xml
ADO.NET Entity框架结构设计-网页教材
Entity Developer Entity Developer Entity Developer
它支持创建各种一映射,如表分割,映射实体到多个表,复杂类型,继承分层,从Sel ect语句创建实体,从SQL代码创建方法等。由于使用了类似T4的模板,所以代码生成非常灵活,另外你还能创建自己的模板用于其他的编程...
Entity Framework的核心 – EDM(Entity Data Model) 7 EDM概述 7 EDM之CSDL 7 EDM之SSDL 11 EDM之MSL 12 EDM中存储过程的设计 15 EDM中ComplexType的设计 16 实体数据模型映射方案 17 Entity Framework的...
自留demo。springMVC-HttpEntity(ResponseEntity)与jQuery交互的小结demo。
Entity Framework Core Cookbook - Second Edition by Ricardo Peres English | 9 Nov. 2016 | ISBN: 1785883305 | 340 Pages | MOBI/EPUB/PDF+Code Files | 6.2 MB Entity Framework is a highly recommended ...
电子书 Entity Framework 4 In Action
Data access is an integral part of any ... You'll learn how to retrieve data by querying the Entity Data Model and understand how to use LINQ to Entities and Entity SQL to query the Entity Data Model.