`

解析文本内容中的url

 
阅读更多
package jp.biziq.sxf;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* URL工具类
* @author 邵祥飞
*
*/
public class URLUtil {

/**
* 匹配url的正则表达式
*/
private static final String reg = "((https?|ftp)\\:\\/\\/)?([a-zA-Z0-9_\\-]+\\.)*[a-zA-Z0-9_\\-]+\\.[a-zA-Z_]{2,6}((/|\\?)[a-zA-Z0-9_\\.\\?=/#%&\\+-_]+|/|)";

/**
* 解析文本内容中的url
* @param str
* @return
*/
public static String autoLinkURLs(String str) {
StringBuffer str_buf = new StringBuffer();
int index_start = 0;//开始位置
int index_end = 0;//结束位置:默认开始位置

Matcher matcher = Pattern.compile(reg,Pattern.CASE_INSENSITIVE).matcher(str);

while(matcher.find()) {
index_start = matcher.start();
str_buf.append(str.substring(index_end, index_start));//未匹配项
index_end = matcher.end();
str_buf.append("<a href='#'>");//前缀
str_buf.append(str.substring(index_start, index_end));//匹配项
str_buf.append("</a>");//后缀
}
str_buf.append(str.substring(index_end));//未匹配项
return str_buf.toString();
}

public static void main(String[] args) {
//说法是大方http://www.google.com.hk/search?hl=zh-CN&newwindow=1&safe=strict&client=firefox-a&rls=org.mozilla%3Azh-CN%3Aofficial&q=java+%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E8%AF%AD%E6%B3%95&oq=java+%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E8%AF%AD%E6%B3%95&aq=f&aqi=g-g1&aql=1&gs_sm=e&gs_upl=945891l945891l0l946259l1l1l0l0l0l0l140l140l0.1l1l0士大夫
String str = "www.baidu.com是的范德萨msg.biz-iq.jp/directMail/show?id=20是的范德萨www.baidu.com<br/>师德师风sddfsf.sdfsd.sdfsd是打发打发的发生地 ";
// String str = "33ss.33是打发打发的发生地 ";
// String str = "google.com.hk/search?hl=zh-CN&newwindow=1&safe=strict&client=firefox-a&rls=org.mozilla%3Azh-CN%3Aofficial&q=java+%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E8%AF%AD%E6%B3%95&oq=java+%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E8%AF%AD%E6%B3%95&aq=f&aqi=g-g1&aql=1&gs_sm=e&gs_upl=945891l945891l0l946259l1l1l0l0l0l0l140l140l0.1l1l0是的发生的";
// String reg = "(www\\.|http:\\/\\/|https:\\/\\/|http:\\/\\/www\\.|https:\\/\\/www\\.)[a-z0-9]+\\.[a-z]{2,4}";
// String reg = "((https?|ftp)\\:\\/\\/)?([a-z0-9+!*(),;?&=\\$_.-]+(\\:[a-z0-9+!*(),;?&=\\$_.-]+)?@)?(([a-z0-9-.]*)\\.([a-z]{2,6}))|(([0-9]{1,3}\\.){3}[0-9]{1,3})(\\:[0-9]{2,5})?(\\/([a-z0-9+\\$_-]\\.?)+)*\\/?(\\?[a-z+&\\$_.-][a-z0-9;:@&%=+\\/\\$_.-]*)?(#[a-z_.-][a-z0-9+\\$_.-]*)?/i";
// String reg = "http://\\([a-zA-Z0-9_\\-]\\+\\(\\.[a-zA-Z0-9_\\-]\\+\\)\\+\\)\\+:\\?[0-9]\\?\\(/*[a-zA-Z0-9_\\-#]*\\.*\\)*?\\?\\(&*[a-zA-Z0-9;_+/.\\-%]*-*=*[a-zA-Z0-9;_+/.\\-%]*-*\\)*";
// String reg = "((f|ht)tp(s)?)\\://([a-zA-Z0-9_\\-]+\\.)*[a-zA-Z0-9_\\-]+\\.[a-zA-Z_]{2,6}((/|\\?)[a-zA-Z0-9_\\.\\?=/#%&\\+-_]+|/|)";

//可匹配www.google.com
// String reg = "(www\\.|http:\\/\\/|https:\\/\\/|http:\\/\\/www\\.|https:\\/\\/www\\.)([a-zA-Z0-9_\\-]+\\.)*[a-zA-Z0-9_\\-]+\\.[a-zA-Z_]{2,6}((/|\\?)[a-zA-Z0-9_\\.\\?=/#%&\\+-_]+|/|)";


// String reg = "((https?|ftp)\\:\\/\\/)?([a-zA-Z0-9_\\-]+\\.)*[a-zA-Z0-9_\\-]+\\.[a-zA-Z_]{2,6}((/|\\?)[a-zA-Z0-9_\\.\\?=/#%&\\+-_]+|/|)";

System.out.println(URLUtil.autoLinkURLs(str)); //test
}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics