再介紹一種用 dreamweaver 清除HTML樣式的方法。
有時候制作網(wǎng)頁,直接從網(wǎng)上復(fù)制的網(wǎng)頁內(nèi)容,或從WORD、PS等軟件生成的HTML都有許多不必要的CSS代碼,影響加載速度,想徹底清除,用dreamweaver再加工,但找不到類似的功能,可以用以下方法:
使用dreamweaver的搜索替換功能,利用正則表達式,可以輕松清除許多垃圾代碼;
如清除內(nèi)聯(lián)樣式:只需要搜索: style="._"
,替換全部即可。
清除其它的代碼也一樣,如 alt="……"
之類
搜索: alt=".*"
,替換全部即可。
這是一個刪除HTML樣式、腳本,你可以看下
public static string NoHTML(string Htmlstring)
{
//刪除腳本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",
RegexOptions.IgnoreCase);
//刪除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\\r\\n])[\\s]+", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\\"",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\\xa1",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\\xa2",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\\xa3",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\\xa9",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(\\d+);", "",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\\r\\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
下面是快速去除樣式、獲得純凈的text文本的代碼。
這種去除樣式的代碼超級簡單,直接拿過來用就可以,方便快捷。
代碼如下(此代碼可以隨意放到該頁面的任意位置):
<%
Public Function TextOnly(ByVal sOut)
Dim re:Set re=New RegExp
re.IgnoreCase=True
re.Global=True
re.Pattern="<script.*>.*</script>"
sOut=re.Replace(sOut,"")
re.Pattern="<style.*>.*</style>"
sOut=re.Replace(sOut,"")
re.Pattern="<object.*>.*</object>"
sOut=re.Replace(sOut,"")
re.Pattern="(<(.[^>]*)>)"
sOut=re.Replace(sOut,"")
Set re=Nothing
TextOnly=sOut
End Function
%>
用法:TextOnly(從數(shù)據(jù)庫提出來的信息內(nèi)容)
//刪除腳本
Htmlstring = Regex.Replace(Htmlstring, @" <script(\\s[^>]*?)?>[\\s\\S]*? </script>", "", RegexOptions.IgnoreCase);
//刪除樣式
Htmlstring = Regex.Replace(Htmlstring, @" <style>[\\s\\S]*? </style>", "", RegexOptions.IgnoreCase);
//刪除html標簽
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)>", "", RegexOptions.IgnoreCase);
public string checkStr(string html)
{
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\\s\\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\\s\\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\\s\\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\\s\\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\\s\\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\\<img[^\\>]+\\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
html = regex1.Replace(html, "");
html = regex2.Replace(html, "");
html = regex3.Replace(html, " _disibledevent=");
html = regex4.Replace(html, "");
html = regex5.Replace(html, "");
html = regex6.Replace(html, "");
html = regex7.Replace(html, "");
html = regex8.Replace(html, "");
html = regex9.Replace(html, "");
html = html.Replace(" ", "");
html = html.Replace("</strong>", "");
html = html.Replace("<strong>", "");
return html;
}