`

CSDN自动回复灌水乐园帖子-httpClient篇

阅读更多

package com.ws;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
//1.首先下载apache的httpClient。。
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

/**
*
* @author zk 自动回复灌水乐园帖子
*
*/
public class Csdn {
public static String COOKIE = "你的登陆后cookie";
private static final String CONTENT_CHARSET = "UTF-8";// httpclient使用的字符集

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception, IOException {
List<String> urlList = TestUrl.getCardPostUrl(TestUrl.getCsdn(null));
getCard(urlList);
// getMethodTest();
}

public static void getMethodTest() throws Exception, IOException {
String html = "http://hi.csdn.net/my.html";
HttpClient hc = getHc();
GetMethod getMethod = new GetMethod(html);
List<Header> headers = new ArrayList<Header>();
headers.add(new Header("Proxy-Connection", "keep-alive"));
headers.add(new Header("Cookie", COOKIE));
hc.getHostConfiguration().getParams().setParameter(
"http.default-headers", headers);

int statusCode = hc.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + getMethod.getStatusLine());
}
// 读取内容
byte[] responseBody = getMethod.getResponseBody();
// 处理内容

String hh = new String(responseBody);

System.out.println(hh);

}

public static HttpClient getHc() {
HttpClient httpClient = new HttpClient();
// java client将按照浏览器的方式来自动处理
httpClient.getParams().setCookiePolicy(
CookiePolicy.BROWSER_COMPATIBILITY);
httpClient.getHostConfiguration().setHost("http://www.csdn.net", 80,
"http");
return httpClient;
}

public static void getCard(List<String> urlList) throws Exception,
IOException {
HttpClient httpClient = null;
PostMethod p = null;
List<Header> headers = null;
NameValuePair __VIEWSTATE = null;
NameValuePair __EVENTVALIDATION = null;
NameValuePair BT_SUBMIT = null;
NameValuePair REPLYBODY = null;
NameValuePair[] params = null;
for (String url : urlList) {
headers = new ArrayList<Header>();
httpClient = getHc();
p = new PostMethod(url);
// 需要验证
// UsernamePasswordCredentials creds = new UsernamePasswordCredentials("chenlb", "123456");

headers.add(new Header(
"User-Agent",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 GTB6 (.NET CLR 1.1.4322)"));
headers.add(new Header("Proxy-Connection", "keep-alive"));
headers.add(new Header("Cookie", COOKIE));
headers.add(new Header("Content-Type",
"application/x-www-form-urlencoded;charset=" + CONTENT_CHARSET));
httpClient.getHostConfiguration().getParams().setParameter(
"http.default-headers", headers);
__VIEWSTATE = new NameValuePair(
"__VIEWSTATE",
"/wEPDwUKMTA2MTA3Njg5NA9kFgICCQ9kFgJmD2QWAgIFD2QWAmYPZBYCZg8PFgIeBE1vZGULKiVTeXN0ZW0uV2ViLlVJLldlYkNvbnRyb2xzLlRleHRCb3hNb2RlARYCHgVzdHlsZQUYaGVpZ2h0OjE4MHB4O3dpZHRoOjEwMCU7ZGRpl2NuIb2XmIUODhEniCtEXExdOA==");
__EVENTVALIDATION = new NameValuePair(
"__EVENTVALIDATION",
"/wEWAwLtl7ScBQK6873ZCgK3mOXeAjqcUaoqnb3Nj0uKUrGKImKcexCG");

BT_SUBMIT = new NameValuePair("bt_submit", "提交回复");

REPLYBODY = new NameValuePair(
"tb_ReplyBody$_$Editor", "[img=http://forum.csdn.net/PointForum/ui/scripts/csdn/Plugin/003/monkey/1.gif][/img]");

params = new NameValuePair[] { __VIEWSTATE,
__EVENTVALIDATION, REPLYBODY, BT_SUBMIT };
p.setRequestBody(params);
int statusCode = httpClient.executeMethod(p);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + p.getStatusLine());
}
System.out.println("Hello,World");
// 读取内容
//byte[] responseBody = p.getResponseBody();
// 处理内容

//String hh = new String(responseBody);
//System.out.println(hh);
}

}
}



package com.ws;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;


public class TestUrl {

public static final String COOKIE = "登陆后cookie"                  
* 连接超时
*/
private static int connectTimeOut = 5000;

/**
* 读取数据超时
*/
private static int readTimeOut = 10000;

/**
* 请求编码
*/
private static String requestEncoding = "GBK";









/**
* 得到大分类的帖子 如:java
* @param java
* @return
* @throws Exception
*/
public static List getCsdn(String java) throws Exception {
List<String> urlList = new ArrayList<String>();
String url = "http://forum.csdn.net/SList/FreeZone";
String patternStrs = "<td class=\"caption\" style=\"word-break: break-all\">(.*?)</td>";
String p = "<a target=\"_blank\" title=\"(.*?)</a>";
String href = "<a target=\"_blank\" title=\"(.*?)\" href=\"(.*?)\" >(.*?)</a>";
String s = "";
String h = "";
URL u = new URL(url);
StringBuffer sTotalString = new StringBuffer("");
HttpURLConnection conn = (HttpURLConnection) u.openConnection();
conn.addRequestProperty("Cookie", COOKIE);

String sCurrentLine = "";
BufferedReader l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(u.openStream()));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine
+ "\n"));
s = RegexpCommon.getMatchString(sCurrentLine, p, 0);
if (StringUtils.isNotBlank(s)) {
h = RegexpCommon.getMatchString(s, href, 2);
System.out.println("得到的URL为:" + h);
urlList.add(h);
}

}

// byte[] b = (sTotalString.toString()).getBytes();
// BufferedOutputStream out = new BufferedOutputStream(
// new FileOutputStream("c:/test.html"));
// out.write(b);
return urlList;
}

/**
* 得到帖子的回复地址
* @param urlList
* @return
* @throws Exception
*/
public static List getCardPostUrl(List<String> urlList) throws Exception {
List<String> postList = new ArrayList<String>();
URL u = null;
HttpURLConnection conn = null;
BufferedReader l_reader = null;
String s = "";
for (String URL : urlList) {
u = new URL(URL);
StringBuffer sTotalString = new StringBuffer("");
conn = (HttpURLConnection) u.openConnection();
conn.addRequestProperty("Cookie", COOKIE);

String sCurrentLine = "";

String patternStrs = "iframe class=\"replyframe\" id=\"replyframe\" frameborder=\"0\" scrolling=\"no\" height=\"415px\" width=\"100%\" src=\"(.*?)\" csdnid=\"rframe\">";
l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(u.openStream()));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine
+ "\n"));
}
conn.disconnect();
s = RegexpCommon.getMatchString(sTotalString.toString(),
patternStrs, 0);
s = s.split("src=\"")[1].split("\" csdnid")[0];
System.out.println(s);
postList.add(s);
}
return postList;



}

public static void main(String[] args) throws Exception {
String s = "http://forum.csdn.net/PointForum/Forum/ReplyT.aspx?forumID=a3049f56-b572-48f5-89be-4797b70d71cd&topicID=b9fbc233-fadf-441b-aad8-2d6a77641f16&postDate=2010-02-01+08%3a40%3a49&v=13";
String d = "tb_ReplyBody___Editor=回复测试!!!";
// GetResponseDataByID(s, d);
//GetResponseDataByID(s, d);
//t();
List<String> urlList = getCsdn(null);
}

}

1.登陆验证码一直没攻克,所以是使用的cookie。

2.注释少了点。

 

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics