`
shuaigg.babysky
  • 浏览: 552631 次
  • 性别: Icon_minigender_1
  • 来自: 济南
社区版块
存档分类
最新评论

nodejs 扒页面 api-docs

 
阅读更多
var continuation = require('../continuation');
var http = require("http");
var fs = require('fs');
var url = require('url');
var ps = require("path");
//var request = require("request");

function main_entry() {
  var $baseurl = "http://directwebremoting.org/dwr/javadoc/";
  var $basepath = "E:\\dwrdoc\\";
  var $lefttopurl = "http://directwebremoting.org/dwr/javadoc/overview-frame.html";
  var getLocal = function(str) {
  	  str = str.replace($baseurl , "");
  	  str = str.replace(/^\// , "");
  	  if(/^http\:/.test(str)) {
  	  	return $basepath;
  	  }
  	  var local = ps.resolve($basepath , str);
  	  var dirname = ps.dirname(local);
  	  console.log(local + "," + dirname);
  	  mkdir(dirname);
  	  return local;
  };
  var mkdir = function(str) {
  	 var arr = str.split(/\\/g);
  	 var step = "";
  	 while(arr.length) {
  	 	var shift = arr.shift();
  	 	step += shift + "\\";
  	 	if(!fs.existsSync(step)) {
  	 		fs.mkdirSync(step , 0777);
  	 	}
  	 }
  };
  var getLinks = function(str , $base) {
  	$base = $base || $baseurl;
  	var reg = /<a.*?href="(.+?)".*?>.*?<\/a>/gi , result;
  	var list = [];
  	while((result = reg.exec(str)) != null) {
  		var location = url.resolve($base , result[1]).replace(/\?.*$/ , "");
  		if(!/#.*$/.test(location)) {
  			list.push(location);
  		}
  	}
  	
  	var reg = /<frame.*?src="(.+?)".*?>/gi;
  	while((result = reg.exec(str)) != null) {
  		var location = url.resolve($base , result[1]).replace(/\?.*$/ , "");
  		if(!/^#/.test(location)) {
  			list.push(location);
  		}
  	}
  	/*
  	var reg = /<img.*?src="(.+?)".*?>/gi;
  	while((result = reg.exec(str)) != null) {
  		var location = url.resolve($base , result[1]).replace(/\?.*$/ , "");
  		var local = getLocal(location);
  		console.log(location + "............");
  		console.log(local + "...............");
  		http.request(location).pipe(fs.createWriteStream(local));
  	}
  	*/
  	return list;
  };
  
  var $httpget = function(url , next) {
  	//console.log('正在处理 ' + url);
  	if(url.indexOf("mailto:") == 0) {
  		return "";
  	}
  	var result = [];
  	http.get(url , cont(res));
  	res.on("data" , function(data) {
  		result.push(data.toString());
  	});
  	res.on("end" , cont(callback));
  	next(null , result.join(''));
  };
  
  $indexpath = $basepath + "index.html";
  
  if(fs.existsSync($indexpath)) {
  	  return;
  }
  
  $httpget($baseurl , obtain(result));
  
  fs.writeFileSync($indexpath , result , "utf-8");
  
  var listO = getLinks(result);
  
  listO.forEach(function(item) {
  	var localPathO = getLocal(item);
  	if(fs.existsSync(localPathO)) {
		return;
	}
	$httpget(item , obtain(resultO));
  	fs.writeFileSync(localPathO , resultO , "utf-8");
  });
  
  
  $httpget($lefttopurl , obtain(result1));
  
  var list1 = getLinks(result1);
  
  list1.forEach(function(item) {
  		var localPath = getLocal(item);
  		
  		if(fs.existsSync(localPath)) {
  			return;
  		}
  		
  		$httpget(item , obtain(result1));
  		fs.writeFileSync(localPath , result1 , "utf-8");
  		
  		var $basedir = url.resolve(item , "./");
  		
  		var list2 = getLinks(result1 , $basedir);
  		
  		list2.forEach(function(item1) {
  			
  			var localPath1 = getLocal(item1);
  			
  			if(fs.existsSync(localPath1)) {
	  			return;
	  		}
  			
  			$httpget(item1 , obtain(result2));
  			
  			
  			
  			fs.writeFileSync(localPath1 , result2 , 'utf-8');
  			
  			return;
  			/*
  			var list3 = getLinks(result2 , $basedir);
  			
  			list3.forEach(function(item2) {
  				var localPath2 = getLocal(item2);
  				if(fs.existsSync(localPath2)) {
		  			return;
		  		}
		  		$httpget(item2 , obtain(result3));
	  			fs.writeFileSync(localPath2 , result3 , 'utf-8');
  			});
  			*/
  			
  			
  		});
  		
  		
  });
};

var code = main_entry.toString();
var compiledCode = continuation.compile(code);
eval(compiledCode);
main_entry();
 
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics