国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > C# > 正文

基于C#實現網頁爬蟲

2019-10-29 21:30:43
字體:
來源:轉載
供稿:網友
這篇文章主要為大家詳細介紹了基于C#實現網頁爬蟲的相關資料,具有一定的參考價值,感興趣的小伙伴們可以參考一下
 

本文實例為大家分享了基于C#實現網頁爬蟲的詳細代碼,供大家參考,具體內容如下

HTTP請求工具類:

功能:

1、獲取網頁html

2、下載網絡圖片

using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Threading.Tasks;using System.Windows.Forms;namespace Utils{  /// <summary>  /// HTTP請求工具類  /// </summary>  public class HttpRequestUtil  {    /// <summary>    /// 獲取頁面html    /// </summary>    public static string GetPageHtml(string url)    {      // 設置參數      HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;      request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";      //發送請求并獲取相應回應數據      HttpWebResponse response = request.GetResponse() as HttpWebResponse;      //直到request.GetResponse()程序才開始向目標網頁發送Post請求      Stream responseStream = response.GetResponseStream();      StreamReader sr = new StreamReader(responseStream, Encoding.UTF8);      //返回結果網頁(html)代碼      string content = sr.ReadToEnd();      return content;    }    /// <summary>    /// Http下載文件    /// </summary>    public static void HttpDownloadFile(string url)    {      int pos = url.LastIndexOf("/") + 1;      string fileName = url.Substring(pos);      string path = Application.StartupPath + "//download";      if (!Directory.Exists(path))      {        Directory.CreateDirectory(path);      }      string filePathName = path + "//" + fileName;      if (File.Exists(filePathName)) return;      // 設置參數      HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;      request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";      request.Proxy = null;      //發送請求并獲取相應回應數據      HttpWebResponse response = request.GetResponse() as HttpWebResponse;      //直到request.GetResponse()程序才開始向目標網頁發送Post請求      Stream responseStream = response.GetResponseStream();      //創建本地文件寫入流      Stream stream = new FileStream(filePathName, FileMode.Create);      byte[] bArr = new byte[1024];      int size = responseStream.Read(bArr, 0, (int)bArr.Length);      while (size > 0)      {        stream.Write(bArr, 0, size);        size = responseStream.Read(bArr, 0, (int)bArr.Length);      }      stream.Close();      responseStream.Close();    }  }}

多線程爬取網頁代碼:

using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.IO;using System.Linq;using System.Text;using System.Text.RegularExpressions;using System.Threading;using System.Threading.Tasks;using System.Windows.Forms;using Utils;namespace 爬蟲{  public partial class Form1 : Form  {    List<Thread> threadList = new List<Thread>();    Thread thread = null;    public Form1()    {      InitializeComponent();    }    private void button1_Click(object sender, EventArgs e)    {      DateTime dtStart = DateTime.Now;      button3.Enabled = true;      button2.Enabled = true;      button1.Enabled = false;      int page = 0;      int count = 0;      int personCount = 0;      lblPage.Text = "已完成頁數:0";      int index = 0;      for (int i = 1; i <= 10; i++)      {        thread = new Thread(new ParameterizedThreadStart(delegate(object obj)        {          for (int j = 1; j <= 10; j++)          {            try            {              index = (Convert.ToInt32(obj) - 1) * 10 + j;              string pageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com/c44/0/1_" + index.ToString() + ".html");              Regex regA = new Regex("<a[//s]+class=/"J-userPic([^<>]*?)[//s]+href=/"([^/"]*?)/"");              Regex regImg = new Regex("<p class=/"tc mb10/"><img[//s]+src=/"([^/"]*?)/"");              MatchCollection mc = regA.Matches(pageHtml);              foreach (Match match in mc)              {                int start = match.ToString().IndexOf("href=/"");                string url = match.ToString().Substring(start + 6);                int end = url.IndexOf("/"");                url = url.Substring(0, end);                if (url.IndexOf("/") == 0)                {                  string imgPageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com" + url);                  personCount++;                  lblPerson.Invoke(new Action(delegate() { lblPerson.Text = "已完成條數:" + personCount.ToString(); }));                  MatchCollection mcImgPage = regImg.Matches(imgPageHtml);                  foreach (Match matchImgPage in mcImgPage)                  {                    start = matchImgPage.ToString().IndexOf("src=/"");                    string imgUrl = matchImgPage.ToString().Substring(start + 5);                    end = imgUrl.IndexOf("/"");                    imgUrl = imgUrl.Substring(0, end);                    if (imgUrl.IndexOf("http://i1") == 0)                    {                      try                      {                        HttpRequestUtil.HttpDownloadFile(imgUrl);                        count++;                        lblNum.Invoke(new Action(delegate()                        {                          lblNum.Text = "已下載圖片數" + count.ToString();                          DateTime dt = DateTime.Now;                          double time = dt.Subtract(dtStart).TotalSeconds;                          if (time > 0)                          {                            lblSpeed.Text = "速度:" + (count / time).ToString("0.0") + "張/秒";                          }                        }));                      }                      catch { }                      Thread.Sleep(1);                    }                  }                }              }            }            catch { }            page++;            lblPage.Invoke(new Action(delegate() { lblPage.Text = "已完成頁數:" + page.ToString(); }));            if (page == 100)            {              button1.Invoke(new Action(delegate() { button1.Enabled = true; }));              MessageBox.Show("完成!");            }          }        }));        thread.Start(i);        threadList.Add(thread);      }    }    private void button2_Click(object sender, EventArgs e)    {      button1.Invoke(new Action(delegate()      {        foreach (Thread thread in threadList)        {          if (thread.ThreadState == ThreadState.Suspended)          {            thread.Resume();          }          thread.Abort();        }        button1.Enabled = true;        button2.Enabled = false;        button3.Enabled = false;        button4.Enabled = false;      }));    }    private void Form1_FormClosing(object sender, FormClosingEventArgs e)    {      foreach (Thread thread in threadList)      {        thread.Abort();      }    }    private void button3_Click(object sender, EventArgs e)    {      foreach (Thread thread in threadList)      {        if (thread.ThreadState == ThreadState.Running)        {          thread.Suspend();        }      }      button3.Enabled = false;      button4.Enabled = true;    }    private void button4_Click(object sender, EventArgs e)    {      foreach (Thread thread in threadList)      {        if (thread.ThreadState == ThreadState.Suspended)        {          thread.Resume();        }      }      button3.Enabled = true;      button4.Enabled = false;    }  }}

截圖:

C#,網頁爬蟲

以上就是本文的全部內容,希望對大家的學習有所幫助。



發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 凌源市| 辛集市| 杭锦旗| 资阳市| 元阳县| 大英县| 广元市| 灵山县| 大名县| 桦川县| 白水县| 会东县| 东莞市| 海门市| 钟山县| 弋阳县| 凭祥市| 武夷山市| 聂拉木县| 资兴市| 福州市| 万宁市| 沧州市| 调兵山市| 扎赉特旗| 普洱| 永福县| 罗平县| 潮州市| 当阳市| 霍林郭勒市| 忻州市| 锦州市| 温宿县| 剑阁县| 虎林市| 古交市| 肥西县| 同德县| 内黄县| 德兴市|