2008-01-06
html过滤
关键字: 过滤以下例子转自一个网友并非本人所写,因为觉得好所以贴出来一起分享
import java.util.regex.Pattern;
public class Test
{
public static void main(String[] args)
{
String ww="<html>sss<body>ss</body>ssss</html>";
String ff=html2Text(ww);
System.out.println(ff);
}
public static String html2Text(String inputString) {
String htmlStr = inputString; // 含html标签的字符串
String textStr = "";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_style;
java.util.regex.Matcher m_style;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;
try {
String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; // 定义script的正则表达式{或<script>]*?>[\s\S]*?<\/script>
// }
String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; // 定义style的正则表达式{或<style>]*?>[\s\S]*?<\/style>
// }
String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式
p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll(""); // 过滤script标签
p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll(""); // 过滤style标签
p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll(""); // 过滤html标签
textStr = htmlStr;
} catch (Exception e) {
System.err.println("Html2Text: " + e.getMessage());
}
return textStr;
}
} 这是struts中的html过滤
public static String filter(String value)
{
if(value == null || value.length() == 0)
return value;
StringBuffer result = null;
String filtered = null;
for(int i = 0; i < value.length(); i++)
{
filtered = null;
switch(value.charAt(i))
{
case 60: // '<'
filtered = "<";
break;
case 62: // '>'
filtered = ">";
break;
case 38: // '&'
filtered = "&";
break;
case 34: // '"'
filtered = """;
break;
case 39: // '\''
filtered = "'";
break;
}
if(result == null)
{
if(filtered != null)
{
result = new StringBuffer(value.length() + 50);
if(i > 0)
result.append(value.substring(0, i));
result.append(filtered);
}
} else
if(filtered == null)
result.append(value.charAt(i));
else
result.append(filtered);
}
return result != null ? result.toString() : value;
}







评论排行榜