//
// Copyright (c) 2011 Shun Takebayashi
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#import "STWebArchiver.h"
#import <libxml/HTMLparser.h>
#import <libxml/xpath.h>
@interface STWebArchiver ()
- (NSArray *)valueForAttributeName:(NSString *)attributeName
withEvaluatingXPath:(NSString *)xpathExpression
inDocument:(xmlDocPtr)document;
- (NSArray *)absoluteURLsForPaths:(NSArray *)paths baseURL:(NSURL *)base;
@end
@implementation STWebArchiver
- (void)archiveHTMLData:(NSData *)aData
textEncoding:(NSString *)anEncoding
baseURL:(NSURL *)anURL
completionBlock:(void (^)(NSData *))completion {
htmlDocPtr doc = htmlParseDoc((xmlChar *)[aData bytes], [anEncoding UTF8String]);
NSArray *pathsForImagesAndScripts = [self valueForAttributeName:@"src" withEvaluatingXPath:@"//script[@src]|//img[@src]" inDocument:doc];
NSArray *pathsForStylesheets = [self valueForAttributeName:@"href" withEvaluatingXPath:@"//link[@rel='stylesheet'][@href]" inDocument:doc];
NSArray *resourcesPaths = [pathsForImagesAndScripts arrayByAddingObjectsFromArray:pathsForStylesheets];
NSArray *resourceUrls = [self absoluteURLsForPaths:resourcesPaths baseURL:anURL];
dispatch_async(dispatch_queue_create("Downloads", 0), ^{
NSMutableDictionary *resources = [NSMutableDictionary dictionary];
dispatch_apply([resourceUrls count], dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT,0), ^(size_t i) {
NSURL *url = [resourceUrls objectAtIndex:i];
NSString *urlString = [url absoluteString];
BOOL unfetched = NO;
@synchronized (resources) {
unfetched = ![resources objectForKey:urlString];
if (unfetched) {
[resources setObject:[NSNull null] forKey:urlString];
}
}
if (unfetched) {
NSURLResponse *response;
NSError *error;
NSURLRequest *request = [NSURLRequest requestWithURL:url];
NSData *data = [NSURLConnection sendSynchronousRequest:request
returningResponse:&response
error:&error];
NSMutableDictionary *resourceArchive = [NSMutableDictionary dictionaryWithObjectsAndKeys:
urlString, @"WebResourceURL",
[response MIMEType], @"WebResourceMIMEType",
data, @"WebResourceData", nil];
if ([response textEncodingName]) {
[resourceArchive setObject:[response textEncodingName] forKey:@"WebResourceTextEncodingName"];
}
@synchronized (resources) {
[resources setObject:resourceArchive forKey:urlString];
}
}
});
NSMutableDictionary *archiveSource = [NSMutableDictionary dictionaryWithObject:[resources allValues] forKey:@"WebSubresources"];
NSMutableDictionary *mainResource = [NSMutableDictionary dictionary];
[mainResource setObject:aData forKey:@"WebResourceData"];
[mainResource setObject:@"" forKey:@"WebResourceFrameName"];
[mainResource setObject:@"text/html" forKey:@"WebResourceMIMEType"];
[mainResource setObject:anEncoding forKey:@"WebResourceTextEncodingName"];
[mainResource setObject:[anURL absoluteString] forKey:@"WebResourceURL"];
[archiveSource setObject:mainResource forKey:@"WebMainResource"];
NSData *webArchive = [NSPropertyListSerialization dataFromPropertyList:archiveSource
format:NSPropertyListBinaryFormat_v1_0
errorDescription:NULL];
completion(webArchive);
});
xmlFreeDoc(doc);
}
- (NSArray *)valueForAttributeName:(NSString *)attributeName
withEvaluatingXPath:(NSString *)xpathExpression
inDocument:(xmlDocPtr)document {
xmlXPathContextPtr context = xmlXPathNewContext(document);
xmlXPathObjectPtr xpathObject = xmlXPathEvalExpression((xmlChar *)[xpathExpression UTF8String], context);
xmlNodeSetPtr nodes = xpathObject->nodesetval;
NSMutableArray *results = nil;
if (!xmlXPathNodeSetIsEmpty(nodes)) {
results = [NSMutableArray arrayWithCapacity:nodes->nodeNr];
for (int i = 0; i < nodes->nodeNr; i++) {
xmlNodePtr node = nodes->nodeTab[i];
char *attributeChars = (char *)xmlGetProp(node, (xmlChar *)[attributeName UTF8String]);
NSString *attributeString = [NSString stringWithUTF8String:attributeChars];
xmlFree(attributeChars);
[results addObject:attributeString];
}
}
xmlXPathFreeObject(xpathObject);
xmlXPathFreeContext(context);
return results;
}
- (NSArray *)absoluteURLsForPaths:(NSArray *)paths baseURL:(NSURL *)base {
NSMutableArray *results = [NSMutableArray arrayWithCapacity:[paths count]];
for (NSString *path in paths) {
[results addObject:[NSURL URLWithString:path relativeToURL:base]];
}
return results;
}
@end
分享到:
相关推荐
解决QQ中转站下载次数 解决QQ中转站下载次数
测试文件中转站
中转站下载次数限制破解器
QQ文件中转站 下载有次数限制,这个小软件可以破解次数限制。只要没被删除就可以下载。
QQFM中转站限次破解工具-iShare 无限次的下载qq中转站工具
QQ邮箱中转站下载次数限制破解工具,免费发送
数学建模Lingo代码加数学模型解决含中转站车辆运输问题,数据可调整,程序可以直接用,模型能直接套用。
快递中转站-商业计划书
开发区垃圾中转站运行管理考核评分.pdf
垃圾中转站施工组织方案.doc
垃圾中转站可研报告分享.pdf
生活垃圾压缩中转站环评报告书.pdf
这是我编写的一个程序,功能是建立一个中转站。 建立本地UDP服务端与TCP客户端,将利用UDP协议接收到的数据用TCP协议发送出去。
QQFM中转站限次破解工具-iShare.rar
垃圾中转站设备安全检查记录表.pdf
垃圾中转站施工组织设计(1).doc
垃圾中转站设备的保养维护说明书模板.doc
Java SSM 快递中转站管理系统【优质毕业设计分享】包括:程序源代码、数据库、配置环境说明,可完美运行。