国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁(yè) > 編程 > .NET > 正文

Asp.net 數(shù)據(jù)采集基類(lèi)(遠(yuǎn)程抓取,分解,保存,匹配)

2024-07-10 13:05:52
字體:
來(lái)源:轉(zhuǎn)載
供稿:網(wǎng)友

/############################################
版權(quán)聲明:
文章內(nèi)容為本站編輯,創(chuàng)作.你可以任意轉(zhuǎn)載、發(fā)布、使用但請(qǐng)務(wù)必以明文標(biāo)注文章原始出處及本聲明
http://www.opent.cn  作者:浪淘沙
############################################/

using system;
using system.data;
using system.configuration;
using system.web;
using system.web.security;
using system.web.ui;
using system.web.ui.webcontrols;
using system.web.ui.webcontrols.webparts;
using system.web.ui.htmlcontrols;
using msxml2;
using system.text.regularexpressions;
namespace ec
{
    /// <summary>
    /// 遠(yuǎn)程文件抓取類(lèi)
    /// </summary>
    public class getremoteobj
    {
     
        #region 構(gòu)造與析構(gòu)函數(shù)
        public getremoteobj()
        {
            //
            // todo: 在此處添加構(gòu)造函數(shù)邏輯
            //
        }
        ~getremoteobj()
        {
            dispose();
        }
        #endregion

        #region idisposable 成員

        public void dispose()
        {          
            gc.suppressfinalize(this);
        }

        #endregion

        #region 日期隨機(jī)函數(shù)
        /**********************************
         * 函數(shù)名稱(chēng):daterndname
         * 功能說(shuō)明:日期隨機(jī)函數(shù)
         * 參    數(shù):ra:隨機(jī)數(shù)
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          random ra = new random();
         *          string s = o.daterndname(ra);
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 日期隨機(jī)函數(shù)
        /// </summary>
        /// <param name="ra">隨機(jī)數(shù)</param>
        /// <returns></returns>
        public  string daterndname(random ra)
        {
            datetime d = datetime.now;
            string s = null, y, m, dd, h, mm, ss;
            y = d.year.tostring();
            m = d.month.tostring();
            if (m.length < 2) m = "0" + m;
            dd = d.day.tostring();
            if (dd.length < 2) dd = "0" + dd;
            h = d.hour.tostring();
            if (h.length < 2) h = "0" + h;
            mm = d.minute.tostring();
            if (mm.length < 2) mm = "0" + mm;
            ss = d.second.tostring();
            if (ss.length < 2) ss = "0" + ss;
            s += y + m + dd + h + mm + ss;
            s += ra.next(100, 999).tostring();
            return s;
        }
        #endregion

        #region 取得文件后綴
        /**********************************
         * 函數(shù)名稱(chēng):getfileextends
         * 功能說(shuō)明:取得文件后綴
         * 參    數(shù):filename:文件名稱(chēng)
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"/xrssfile/2007-2/23/200722311844445.gif";
         *          string s = o.getfileextends(url);
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 取得文件后綴
        /// </summary>
        /// <param name="filename">文件名稱(chēng)</param>
        /// <returns></returns>
        public string getfileextends(string filename)
        {
            string ext = null;
            if (filename.indexof('.') > 0)
            {
                string[] fs = filename.split('.');
                ext = fs[fs.length - 1];
            }
            return ext;
        }
        #endregion

        #region 獲取遠(yuǎn)程文件源代碼
        /**********************************
         * 函數(shù)名稱(chēng):getremotehtmlcode
         * 功能說(shuō)明:獲取遠(yuǎn)程文件源代碼
         * 參    數(shù):url:遠(yuǎn)程url
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          string s = o.getremotehtmlcode(url);
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 獲取遠(yuǎn)程文件源代碼
        /// </summary>
        /// <param name="url">遠(yuǎn)程url</param>
        /// <returns></returns>
        public string getremotehtmlcode(string url)
        {
            string s = "";
            msxml2.xmlhttp _xmlhttp = new msxml2.xmlhttpclass();
            _xmlhttp.open("get", url, false, null, null);
            _xmlhttp.send("");
            if (_xmlhttp.readystate == 4)
            {
                s = system.text.encoding.default.getstring((byte[])_xmlhttp.responsebody);
            }
            return s;
        }

        #endregion

        #region 保存遠(yuǎn)程文件
        /**********************************
         * 函數(shù)名稱(chēng):remotesave
         * 功能說(shuō)明:保存遠(yuǎn)程文件
         * 參    數(shù):url:遠(yuǎn)程url;path:保存到的路徑
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string s = "";
         *          string url = @"/xrssfile/2007-2/23/200722311844445.gif";
         *          string path =server.mappath("html/");
         *          s = o.remotesave(url,path);
         *          response.write(s);
         *          o.dispose();        
         * ******************************/
        /// <summary>
        /// 保存遠(yuǎn)程文件
        /// </summary>
        /// <param name="url">遠(yuǎn)程url</param>
        /// <param name="path">保存到的路徑</param>
        /// <returns></returns>
        public string remotesave(string url, string path)
        {
            random ra = new random();
            string stringfilename = daterndname(ra) + "." + getfileextends(url);
            string stringfilepath = path + stringfilename;
            msxml2.xmlhttp _xmlhttp = new msxml2.xmlhttpclass();
            _xmlhttp.open("get", url, false, null, null);
            _xmlhttp.send("");
            if (_xmlhttp.readystate == 4)
            {
                if (system.io.file.exists(stringfilepath))
                    system.io.file.delete(stringfilepath);
                system.io.filestream fs = new system.io.filestream(stringfilepath, system.io.filemode.createnew);
                system.io.binarywriter w = new system.io.binarywriter(fs);
                w.write((byte[])_xmlhttp.responsebody);
                w.close();
                fs.close();
            }
            else
                throw new exception(_xmlhttp.statustext);
            return stringfilename;
        }
        #endregion

        #region 替換網(wǎng)頁(yè)中的換行和引號(hào)
        /**********************************
         * 函數(shù)名稱(chēng):replaceenter
         * 功能說(shuō)明:替換網(wǎng)頁(yè)中的換行和引號(hào)
         * 參    數(shù):htmlcode:html源代碼
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.replaceenter(htmlcode);
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 替換網(wǎng)頁(yè)中的換行和引號(hào)
        /// </summary>
        /// <param name="htmlcode">html源代碼</param>
        /// <returns></returns>
        public string replaceenter(string htmlcode)
        {
            string s = "";
            if (htmlcode == null || htmlcode == "")
                s = "";
            else
                s = htmlcode.replace("/"", "");
            s = s.replace("/r/n", "");
            return s;
        }

        #endregion              

        #region 執(zhí)行正則提取出值
        /**********************************
         * 函數(shù)名稱(chēng):getregvalue
         * 功能說(shuō)明:執(zhí)行正則提取出值
         * 參    數(shù):htmlcode:html源代碼
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.replaceenter(htmlcode);
         *          string reg="<title>.+?</title>";
         *          string getvalue=o.getregvalue(reg,htmlcode)
         *          response.write(getvalue);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 執(zhí)行正則提取出值
        /// </summary>
        /// <param name="regexstring">正則表達(dá)式</param>
        /// <param name="remotestr">htmlcode源代碼</param>
        /// <returns></returns>
        public string  getregvalue(string regexstring, string remotestr)
        {
            string matchvale = "";
            regex r = new regex(regexstring);
            match m = r.match(remotestr);
            if (m.success)
            {
                matchvale = m.value;
            }
            return matchvale;
        }
        #endregion       

        #region 替換html源代碼
        /**********************************
         * 函數(shù)名稱(chēng):removehtml
         * 功能說(shuō)明:替換html源代碼
         * 參    數(shù):htmlcode:html源代碼
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.replaceenter(htmlcode);
         *          string reg="<title>.+?</title>";
         *          string getvalue=o.getregvalue(reg,htmlcode)
         *          response.write(getvalue);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 替換html源代碼
        /// </summary>
        /// <param name="htmlcode">html源代碼</param>
        /// <returns></returns>
        public string removehtml(string htmlcode)
        {
            string matchvale = htmlcode;         
            foreach (match s in regex.matches(htmlcode, "<.+?>"))
            {
                matchvale = matchvale.replace(s.value, "");
            }
            return matchvale;       
        }

        #endregion

        #region 匹配頁(yè)面的鏈接
        /**********************************
         * 函數(shù)名稱(chēng):gethref
         * 功能說(shuō)明:匹配頁(yè)面的鏈接
         * 參    數(shù):htmlcode:html源代碼
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.gethref(htmlcode);
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 獲取頁(yè)面的鏈接正則
        /// </summary>
        /// <param name="htmlcode"></param>
        /// <returns></returns>
        public string gethref(string htmlcode)
        {
            string matchvale = "";
            string reg = @"(h|h)(r|r)(e|e)(f|f) *= *('|"")?((/w|//|//|/.|:|-|_)+)('|""| *|>)?";          
            foreach(match m in regex.matches(htmlcode,reg))
            {
                matchvale += (m.value).tolower().replace("href=", "").trim() + "||";
            }
            return matchvale;        
        }
        #endregion

        #region 匹配頁(yè)面的圖片地址
        /**********************************
         * 函數(shù)名稱(chēng):getimgsrc
         * 功能說(shuō)明:匹配頁(yè)面的圖片地址
         * 參    數(shù):htmlcode:html源代碼;imghttp:要補(bǔ)充的http.當(dāng)比如:<img src="http://www.pushad.com/info/bb/x.gif">則要補(bǔ)充http://www.baidu.com/,當(dāng)包含http信息時(shí),則可以為空
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.getimgsrc(htmlcode,"http://www.baidu.com/");
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 匹配頁(yè)面的圖片地址
        /// </summary>
        /// <param name="htmlcode"></param>
        /// <param name="imghttp">要補(bǔ)充的http://路徑信息</param>
        /// <returns></returns>
        public string getimgsrc(string htmlcode, string imghttp)
        {
            string matchvale = "";
            string reg = @"<img.+?>";
            foreach (match m in regex.matches(htmlcode, reg))
            {
                matchvale += getimg((m.value).tolower().trim(), imghttp) + "||";
            }
            return matchvale;
        }
        /// <summary>
        /// 匹配<img src="" />中的圖片路徑實(shí)際鏈接
        /// </summary>
        /// <param name="imgstring"><img src="" />字符串</param>
        /// <returns></returns>
        public string getimg(string imgstring, string imghttp)
        {
            string matchvale = "";
            string reg = @"src=.+/.(bmp|jpg|gif|png|)";
            foreach (match m in regex.matches(imgstring.tolower(), reg))
            {
                matchvale += (m.value).tolower().trim().replace("src=","");
            }
            return (imghttp+matchvale);
        }

        #endregion

        #region 替換通過(guò)正則獲取字符串所帶的正則首尾匹配字符串
        /**********************************
         * 函數(shù)名稱(chēng):gethref
         * 功能說(shuō)明:匹配頁(yè)面的鏈接
         * 參    數(shù):htmlcode:html源代碼
         * 調(diào)用示例:
         *          getremoteobj o = new getremoteobj();
         *          string url = @"http://www.baidu.com";
         *          strion htmlcode = o.getremotehtmlcode(url);
         *          string s = o.regreplace(htmlcode,"<title>","</title>");
         *          response.write(s);
         *          o.dispose();
         * ********************************/
        /// <summary>
        /// 替換通過(guò)正則獲取字符串所帶的正則首尾匹配字符串
        /// </summary>
        /// <param name="regvalue">要替換的值</param>
        /// <param name="regstart">正則匹配的首字符串</param>
        /// <param name="regend">正則匹配的尾字符串</param>
        /// <returns></returns>
        public string regreplace(string regvalue, string regstart,string regend)
        {
            string s = regvalue;
            if (regvalue != "" && regvalue != null)
            {
                if (regstart != "" && regstart != null)
                {
                    s = s.replace(regstart, "");
                }
                if (regend != "" && regend != null)
                {
                    s = s.replace(regend, "");
                }
            }
            return s;
        }
        #endregion


    }
}

 
發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 德阳市| 武宣县| 云林县| 齐齐哈尔市| 鄂托克前旗| 墨玉县| 宝应县| 万荣县| 阿拉善右旗| 临颍县| 绍兴县| 宁远县| 台安县| 原阳县| 武清区| 科尔| 上犹县| 长武县| 屏东市| 礼泉县| 富顺县| 白河县| 福海县| 平昌县| 苍梧县| 东平县| 昆山市| 石嘴山市| 富川| 诸城市| 平和县| 台南市| 灵川县| 阳高县| 临湘市| 水城县| 冕宁县| 乌鲁木齐县| 图片| 清涧县| 郧西县|