国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學院 > 開發設計 > 正文

隨手正則寫的CSDN【只看樓主】功能

2019-11-14 16:44:33
字體:
來源:轉載
供稿:網友

寫這個的時候居然沒有看到原來CSDN已經有這個功能了,寫完代碼了突然發現原來早就已經有了。

現把代碼貼出來吧,雖然有很多解析HTML的開源類庫如:http://htmlagilitypack.codeplex.com/,但我一直習慣于正則匹配。

截圖:

呵呵,起碼還能看吧@——#

  1 PRivate void button1_Click(object sender, EventArgs e)  2         {  3             if (!string.IsNullOrEmpty(txtCsdnUrl.Text.Trim()))  4             {  5                 string url = txtCsdnUrl.Text.Trim();  6                 string htmlSource = string.Empty;  7                 htmlSource = GetHtmlSource(url);  8                 int pageCount = GetPageCount(htmlSource);  9                 string context = string.Empty; 10  11                 if (pageCount > 1) 12                 { 13                     for (int i = 1; i <= pageCount; i++) 14                     { 15                         htmlSource = GetHtmlSource(url + "?page=" + i); 16  17                        context+= GetLZArticle(htmlSource); 18                     } 19                 } 20                 else 21                 { 22                     context += GetLZArticle(htmlSource); 23                 } 24  25                 richTextBox1.Text = context; 26  27             } 28             else 29             { 30                 MessageBox.Show("請輸入地址"); 31             } 32         } 33  34         /// <summary> 35         /// 獲取源代碼 36         /// </summary> 37         /// <param name="Url"></param> 38         /// <returns></returns> 39         public string GetHtmlSource(string Url) 40         { 41             WebClient client = new WebClient(); 42             client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)"); 43             Stream data = client.OpenRead(Url); 44             string result = string.Empty; 45             using (StreamReader reader = new StreamReader(data, Encoding.UTF8)) 46             { 47                 result = reader.ReadToEnd(); 48             } 49  50             return result; 51         } 52  53         /// <summary> 54         /// 獲取貼子總頁數 URL格式:http://bbs.csdn.net/topics/390730011?page=2 55         /// </summary> 56         /// <returns>返回最大頁數</returns> 57         public int GetPageCount(string HtmlSource) 58         { 59             int pageCount = 0; 60  61             Regex reg = new Regex("<select class=/"jumpMenu/" name=/"jumpMenu/">(?<val>.*?)</select>", RegexOptions.Singleline | RegexOptions.IgnoreCase); 62             string htmlSource = HtmlSource; 63             Regex reg1 = new Regex("<option.*?>(?<val>.*?)</option>", RegexOptions.Singleline | RegexOptions.IgnoreCase); 64             int count = reg1.Matches(reg.Match(htmlSource).Groups["val"].Value).Count; 65  66             int.TryParse(reg1.Matches(reg.Match(htmlSource).Groups["val"].Value)[count - 1].Groups["val"].Value, 67                          out pageCount); 68  69             return pageCount; 70         } 71  72         /// <summary> 73         /// 獲取文章標題 74         /// </summary> 75         /// <param name="HtmlSource">網頁內容</param> 76         /// <returns></returns> 77         public string GetArticleTitle(string HtmlSource) 78         { 79             string title = string.Empty; 80  81             Regex reg = new Regex("<span class=/"title text_overflow/">(?<title>.*?)</span>", RegexOptions.Singleline | RegexOptions.IgnoreCase); 82  83             title = reg.Match(HtmlSource).Groups["title"].Value; 84  85             return title; 86         } 87  88  89         public string GetAuthorName(string HtmlSource) 90         { 91             string result = string.Empty; 92  93             Regex regex = new Regex("<a class=/"p-author/" href=/"#/">(?<value>.*?)</a>"); 94  95             result = regex.Match(HtmlSource).Groups["value"].Value; 96  97             return result; 98         } 99 100         public string GetLZArticle(string HtmlSource)101         {102 103             string result = string.Empty;104             string authorName = GetAuthorName(HtmlSource);105 106             Regex regex = new Regex("<td valign=/"top/" class=/"post_info .*?/" data-username=/"" + authorName + "/".*?>.*?<div class=/"post_body/">(?<value>.*?)</div>.*?</td>", RegexOptions.Singleline | RegexOptions.IgnoreCase);107 108             for (int i = 0; i < regex.Matches(HtmlSource).Count; i++)109             {110                 result += regex.Matches(HtmlSource)[i].Groups["value"].Value;111                 result += "--------------------分隔線--------------------";112             }113             return result.Trim().Replace("<br />","/r/n");114         }

代碼都在這里了。


發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 太谷县| 金山区| 闽侯县| 唐河县| 团风县| 安平县| 东兰县| 洛川县| 平陆县| 门头沟区| 启东市| 东阿县| 丹凤县| 大姚县| 营口市| 胶南市| 兴和县| 如皋市| 淮安市| 杭锦旗| 礼泉县| 佛冈县| 闵行区| 建水县| 义乌市| 湖北省| 楚雄市| 大足县| 班戈县| 图木舒克市| 江安县| 若羌县| 息烽县| 兰坪| 尼木县| 罗源县| 武定县| 武功县| 札达县| 怀远县| 聊城市|