C#使用正则表达式移除所有的Html标记,返回纯文本
C#使用正则表达式移除所有的Html标记,返回纯文本
C# Code:
C# 全选
/// <summary>
/// C#使用正则表达式移除所有的Html标记,返回纯文本
/// </summary>
/// <param name="HtmlString"></param>
/// <returns>返回纯文本</returns>
private static string RemoveHTML(string HtmlString)
{
//删除脚本
HtmlString =
Regex.Replace(HtmlString, @"<script[^>]*?>.*?</script>",
"", RegexOptions.IgnoreCase);
//删除HTML
HtmlString = Regex.Replace(HtmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"-->", "", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
HtmlString = Regex.Replace(HtmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);
HtmlString.Replace("<", "");
HtmlString.Replace(">", "");
HtmlString.Replace("\r\n", "");
HtmlString = HttpContext.Current.Server.HtmlEncode(HtmlString).Trim();
return HtmlString;
}
//来源:C/S框架网(www.csframework.com) QQ:23404761
版权声明:本文为开发框架文库发布内容,转载请附上原文出处连接
NewDoc C/S框架网