HttpClient 模拟登录并解析网页数据

wangxing0311

浏览: 51662 次
性别:
来自: 杭州

最近访客更多访客>>

wuhongyu

bookong

294460620

坚苦卓绝

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

HttpClient应用

数据结构 Web Ajax Windows Firefox

//Post Method 例子（模拟登录）

public class PostExample {

private static HttpClient client;

private static Cookie[] cookies;

private static String WEB_SITE = "http://www.xxx.cn";

private static String LOGIN = "http://www.xxx.cn/login.html";

private static String INDEX = "http://www.xxx.cn/index.html";

private static int WEB_PORT = 80;

private static String USER_NAME = "username";

private static String PASSWORD = "password";

static {

client = new HttpClient();

client.getHttpConnectionManager().getParams().setSoTimeout(15000);

client.getHttpConnectionManager().getParams().setConnectionTimeout(15000);

cookies = client.getState().getCookies();

}

private static void testLogin() {

client.getHostConfiguration().setHost(WEB_SITE, WEB_PORT);

GetMethod get = new GetMethod(LOGIN);

GetExample.processGet(client, get, cookies, false, false);

PostMethod post = new PostMethod(LOGIN);

NameValuePair[] params = new NameValuePair[] { new NameValuePair("email", USER_NAME),
new NameValuePair("pass", PASSWORD), new NameValuePair("remember", "1"),

new NameValuePair("goto", "/index.html") };

processPost(client, post, params, cookies, false, false);

Header header = post.getResponseHeader("location");

String url = header.getValue();

if (url.equals(INDEX)) {

System.out.println("登录成功!");

} else {

System.out.println("登录失败,请检查请求参数以及url是否正确...");

return;

}	

get = new GetMethod(url);	

String result = GetExample.processGet(client, get, cookies, false, true);

//将得到的结果集写到文件里

String filePath = Util.writerFile(result);

//解析html

//ParserExample.parserHtml(filePath);

}


public static String processPost(HttpClient client, PostMethod post, NameValuePair[] params, Cookie[] cookies,

boolean needAppendCookies, boolean needResponse) {

try {

post.setRequestHeader("User-Agent",

"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.10) Gecko/20100914 Firefox/3.6.10");

if (params != null && params.length > 0) {

post.setRequestBody(params);

}

if (cookies != null) {

post.setRequestHeader("cookie", cookies.toString());

}	

// 设置post方法请求超时为 10秒

post.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 10000);

client.executeMethod(post);

if (needAppendCookies) {

cookies = client.getState().getCookies();

client.getState().addCookies(cookies);

}

if (needResponse) {

return post.getResponseBodyAsString();

}

} catch (HttpException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

client.getParams().clear();

post.releaseConnection();

try {

Thread.sleep(3000);

} catch (InterruptedException e) {

e.printStackTrace();

}
}

return null;

}

public static void main(String[] args) {

testLogin();

}

}


//将get,post响应的result写到文件里

public class Util {
	
public static String writerFile(String result) {
		
File file = new File("d:\\" + UUID.randomUUID() + ".html");
		
byte[] bytes = new byte[1024 * 3];
		
bytes = result.getBytes();
		
FileOutputStream fos;
		
try {	

fos = new FileOutputStream(file);
			
fos.write(bytes, 0, bytes.length);
			
fos.flush();
			
fos.close();
			
return file.getAbsolutePath();
		
} catch (FileNotFoundException e) {
			
e.printStackTrace();
		
} catch (IOException e) {
			
e.printStackTrace();
		
}
		
return null;
	
}
}

//以htmlparser和正则两种方式解析得到网页上的内容

public class ParserExample {

public static void parserHtml(String filePath) {
		
parserName(filePath);
		
parserMemberCount();
	
}

//解析会员名
private static void parserName(String filePath) {

try {
			
Parser parser = new Parser(filePath);
			
parser.setEncoding("gbk");
			
NodeFilter filter = new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href",
					
"http://www.xxx.cn/member/index.html"));
			
NodeList nodeList = parser.extractAllNodesThatMatch(filter);
			
if (nodeList != null) {		

LinkTag node = (LinkTag) nodeList.elementAt(0);
				
String name = node.getChildren().elementAt(0).toPlainTextString();		

if (name == null) {
					
System.out.println("解析姓名出错，请检查网页结构是否发生变化");
				
} else {
					
System.out.println("姓名:" + name);
				
}	

}
		
} catch (ParserException e) {
			
e.printStackTrace();
		
}
	
}

 	
//解析网站会员数
private static void parserMemberCount() {
		
HttpClient client = new HttpClient();
		
Cookie[] cookies = client.getState().getCookies();

client.getHostConfiguration().setHost("http://www.xxx.cn", 80);

GetMethod get = new GetMethod("http://www.xxx.cn/ajax/memberCount.html");
		
String result = GetExample.processGet(client, get, cookies, false, true);

Pattern pattern = Pattern.compile("var value=(.*?);");
		
Matcher matcher = pattern.matcher(result);

if (matcher.find()) {
			
String memberCount = matcher.group(1);	

System.out.println("会员:" + memberCount);
		
}

}
}

2
顶

0
踩

分享到：

HtmlParser简单入门例子 | 高效的MySQL分页

2011-01-26 17:31
浏览 3458
评论(1)
分类:互联网
查看更多

1 楼 louiswun 2014-06-02

GetExample这个类没有提供哦

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

HttpClient 模拟登录并解析网页数据

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

HttpClient 模拟登录并解析网页数据

评论

发表评论

相关推荐

使用HttpClient过程中见的一些问题

最近访客更多访客>>