国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學(xué)院 > 開發(fā)設(shè)計(jì) > 正文

基于Java的數(shù)據(jù)采集(三)

2019-11-14 20:51:07
字體:
供稿:網(wǎng)友
基于java的數(shù)據(jù)采集(三)

《基于Java的數(shù)據(jù)采集(一)》:http://m.survivalescaperooms.com/lichenwei/p/3904715.html

《基于Java的數(shù)據(jù)采集(二)》:http://m.survivalescaperooms.com/lichenwei/p/3905370.html

《基于Java的數(shù)據(jù)采集(終結(jié)篇)》:http://m.survivalescaperooms.com/lichenwei/p/3910492.html

基于之前2篇Java數(shù)據(jù)采集入庫,做了下功能整合,實(shí)現(xiàn)本地的存讀取,上個效果圖:

直接上代碼吧,本程序只是作為"如何用JAVA抓取頁面簡單采集入庫"的入門,在實(shí)際做采集工具的時候,還需考慮許多東西,比如當(dāng)采集一個頁面發(fā)生卡頓時,發(fā)生延遲時怎么辦?等一系列的問題,希望這篇文字能夠拋磚引玉。

先看下項(xiàng)目結(jié)構(gòu):

一共有五個類:

MySQL.java  --數(shù)據(jù)庫操作類

RegEX.java  --正則匹配類

GetAllData.java --采集類

Action.java  --功能實(shí)現(xiàn)類

FootBallMain.java --主程序類

其他的,直接結(jié)合前面2篇文章外加看代碼注釋吧

Mysql.java

 1 package com.lcw.curl; 2  3  4 import java.sql.Connection; 5 import java.sql.DriverManager; 6 import java.sql.ResultSet; 7 import java.sql.SQLException; 8 import java.sql.Statement; 9 10 11 /**12  * 數(shù)據(jù)庫操作類,一更新,一查詢13  * @author Balla_兔子14  *15  */16 public class MySql {17   18     //定義MySql驅(qū)動,數(shù)據(jù)庫地址,數(shù)據(jù)庫用戶名 密碼, 執(zhí)行語句和數(shù)據(jù)庫連接  19     public String driver = "com.mysql.jdbc.Driver";20     public String url = "jdbc:mysql://127.0.0.1:3306/football";21     public String user = "root";22     public String passWord = "";23     public Statement stmt = null;24     public Connection conn = null;25     26     //創(chuàng)建一個插入數(shù)據(jù)的方法27     public void datatoMySql(String insertSQl) {28 29         try {30             try {31                 Class.forName(driver).newInstance();32             } catch (Exception e) {33                 System.out.Mysql.java

RegEX.java

 1 package com.lcw.curl; 2  3 import java.util.regex.Matcher; 4 import java.util.regex.Pattern; 5  6 public class RegEX { 7  8     /** 9      * 10      * @param regex11      * 正則表達(dá)式12      * @param content13      * 所要匹配的內(nèi)容14      * @return15      */16     public String getData(String regex, String content) {17         Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);// 設(shè)定正則表達(dá)式,不區(qū)分大小寫18         Matcher matcher = pattern.matcher(content);19         if (matcher.find()) {20             return matcher.group();//返回正則匹配結(jié)果21         } else {22             return "";23         }24     }25 26 }
RegEX.java

GetAllData.java

 1 package com.lcw.curl; 2  3 import java.io.BufferedReader; 4 import java.io.InputStreamReader; 5 import java.net.URL; 6  7 public class GetAllData { 8  9     /**采集類10      * @param Balla_兔子11      */12     public void getAllData() {13 14         try {15             String address = "http://www.footballresults.org/league.php?league=EngDiv1";16             URL url = new URL(address);17             InputStreamReader inputStreamReader = new InputStreamReader(url18                     .openStream(), "utf-8");// 打開地址,以UTF-8編碼的形式返回字節(jié)并轉(zhuǎn)為字符19             BufferedReader bufferedReader = new BufferedReader(20                     inputStreamReader);// 從字符輸入流中讀取文本,緩沖各個字符,從而提供字符、數(shù)組和行的高效讀取。21 22             RegEX data = new RegEX();23             MySql mySql = new MySql();24             String content = "";// 用來接受每次讀取的行字符25             int flag = 0;// 標(biāo)志,隊(duì)伍信息剛好在日期信息后面,則正則相同,用于分離數(shù)據(jù)26             String dateRegex = "http://d{1,2}//.//d{1,2}//.//d{4}";// 日期匹配正則表達(dá)式27             String teamRegex = ">[^<>]*</a>";// 隊(duì)伍匹配正則表達(dá)式28             String scoreRegex = ">(//d{1,2}-//d{1,2})</TD>";// 比分正則表達(dá)式29             String tempDate = "";// 存儲臨時比賽時間30             String teama = "";// 存儲臨時主隊(duì)31             String teamb = "";// 存儲臨時客隊(duì)32             String score = "";// 存儲臨時比分33             int i = 0;// 記錄信息條數(shù)34             String sql = "";// 數(shù)據(jù)庫語句35 36             while ((content = bufferedReader.readLine()) != null) {// 每次讀取一行數(shù)據(jù)37                 // 獲取比賽日期信息38                 String dateInfo = data.getData(dateRegex, content);39                 if (!dateInfo.equals("")) {40                     // System.out.println("日期:" + dateInfo);41                     tempDate = dateInfo;42                     flag++;43                 }44                 // 獲取隊(duì)伍信息,需先讀到日期信息讓標(biāo)志符自增45                 String teamInfo = data.getData(teamRegex, content);46                 if (!teamInfo.equals("") && flag == 1) {47                     teama = teamInfo.substring(1, teamInfo.indexOf("</a>"));48                     // System.out.println("主隊(duì):" + teama);49                     flag++;50                 } else if (!teamInfo.equals("") && flag == 2) {51                     teamb = teamInfo.substring(1, teamInfo.indexOf("</a>"));52                     // System.out.println("客隊(duì):" + teamb);53                     flag = 0;54                 }55                 // 獲取比分信息56                 String scoreInfo = data.getData(scoreRegex, content);57                 if (!scoreInfo.equals("")) {58                     score = scoreInfo.substring(1, scoreInfo.indexOf("</TD>"));59                     // System.out.println("比分:" + score);60                     // System.out.println();61                     i++;62                     sql = "insert into football(`date`,`teama`,`teamb`,`score`) values('"63                             + tempDate64                             + "','"65                             + teama66                             + "','"67                             + teamb68                             + "','"69                             + score + "')";70                     mySql.datatoMySql(sql);71                     System.out.println("存儲數(shù)據(jù)成功:" + i + "條");72                 }73 74             }75             bufferedReader.close();76             // System.out.println("一共收集到了" + i + "條信息");77         } catch (Exception e) {78             e.printStackTrace();79         }80 81     }82 83 }
GetAllData.java

Action.java

  1 package com.lcw.curl;  2   3 import java.sql.ResultSet;  4 import java.sql.SQLException;  5 import java.util.ArrayList;  6 import java.util.List;  7 import java.util.Vector;  8   9 public class Action { 10  11     /** 12      * 操作一:初始化數(shù)據(jù)庫數(shù)據(jù) 13      */ 14     public void initData() { 15         String sql = "delete from football"; 16         MySql doMySql = new MySql(); 17         try { 18             doMySql.datatoMySql(sql); 19             System.out.println("數(shù)據(jù)初始化完畢!"); 20         } catch (Exception e) { 21             System.out.println("數(shù)據(jù)初始化失敗!"); 22         } 23  24     } 25  26     /** 27      * 獲取所有隊(duì)伍信息 28      *  29      * @return 30      */ 31     public Vector<String> getAllTeam() { 32         ResultSet rs = null; 33         Vector<String> vector = new Vector<String>(); 34         String sql = "select teama,teamb from football"; 35         MySql doMySql = new MySql(); 36         rs = doMySql.searchMySql(sql); 37  38         try { 39             while (rs.next()) { 40                 try { 41                     if (!vector.contains(rs.getString("teama"))) { 42                         vector.add(rs.getString("teama")); 43                     } 44                     if (!vector.contains(rs.getString("teamb"))) { 45                         vector.add(rs.getString("teamb")); 46                     } 47                 } catch (SQLException e) { 48                     e.printStackTrace(); 49                 } 50             } 51         } catch (SQLException e) { 52             e.printStackTrace(); 53         } 54  55         return vector; 56  57     } 58  59     /** 60      * 獲取具體某隊(duì)的比賽信息 61      *  62      * @param team 63      * @return 64      */ 65     public List<String> findTeam(String team) { 66         List<String> list = new ArrayList<String>(); 67         String sql = "select * from football where teama ='" + team 68                 + "' or teamb ='" + team + "'"; 69         MySql mysql = new MySql(); 70         ResultSet rs = null; 71         rs = mysql.searchMySql(sql); 72         try { 73             while (rs.next()) { 74                 list.add(rs.getString("date")); 75                 list.add(rs.getString("teama")); 76                 list.add(rs.getString("teamb")); 77                 list.add(rs.getString("score")); 78             } 79         } catch (SQLException e) { 80             e.printStackTrace(); 81         } 82         return list; 83  84     } 85  86     public List<String> findGame(String date) { 87         List<String> list = new ArrayList<String>(); 88         ResultSet rs = null; 89         String sql = "select * from football where date ='" + date + "'"; 90         MySql mysql = new MySql(); 91         rs = mysql.searchMySql(sql); 92         try { 93             while (rs.next()) { 94                 list.add(rs.getString("date")); 95                 list.add(rs.getString("teama")); 96                 list.add(rs.getString("teamb")); 97                 list.add(rs.getString("score")); 98             } 99         } catch (SQLException e) {100             // TODO Auto-generated catch block101             e.printStackTrace();102         }103         return list;104     }105 106 }
Action.java

FootBallMain.java

 1 package com.lcw.curl; 2  3 import java.util.List; 4 import java.util.Scanner; 5 import java.util.Vector; 6  7 public class FootBallMain { 8  9     /**主程序類10      * @param Balla_兔子11      */12     public static void main(String[] args) {13         GetAllData allData = new GetAllData();14         Action action = new Action();15 16         while (true) {17             System.out.println("①初始化數(shù)據(jù)庫-請按 (1)");18             System.out.println("②自動化采集數(shù)據(jù)-請按(2)");19             System.out.println("③查詢參賽隊(duì)伍-請按(3)");20             System.out.println("④查詢具體球隊(duì)比賽結(jié)果-請按(4)");21             System.out.println("⑤查詢具體某天的比賽詳情-請按(5)");22             Scanner scanner = new Scanner(System.in);23             String input = scanner.next();24             if (input.equals("1")) {25                 System.out.println();26                 action.initData();27                 System.out28                         .println("-----------------------------------------------------");29             } else if (input.equals("2")) {30                 System.out.println("正在采集數(shù)據(jù)...請稍后");31                 allData.getAllData();32                 System.out33                         .println("-----------------------------------------------------");34             } else if (input.equals("3")) {35                 Vector<String> allTeam = action.getAllTeam();36                 System.out.println("正在獲取數(shù)據(jù)...請稍后");37                 if (allTeam.size() != 0) {38                     System.out.println("參賽隊(duì)伍如下:");39                     for (int i = 0; i < allTeam.size(); i++) {40                         System.out.println(allTeam.get(i));41                     }42                 }43                 System.out44                         .println("-----------------------------------------------------");45             } else if (input.equals("4")) {46                 System.out.println("請輸入您要查詢的隊(duì)伍名:");47                 String team = scanner.next();48                 List<String> list = action.findTeam(team);49                 System.out.println("比賽日期/t/t/t主隊(duì)/t/t客隊(duì)/t/t/t比賽結(jié)果");50                 if (list.size() != 0) {51                     for (int i = 0; i < list.size(); i++) {52                         System.out.print(list.get(i) + "/t/t");53                     }54                 } else {55                     System.out.println("暫時沒有您所提供隊(duì)伍的比賽信息,敬請關(guān)注...");56                 }57                 System.out.println();58                 System.out59                         .println("-----------------------------------------------------");60             } else if (input.equals("5")) {61                 System.out.println("請輸入您要查詢?nèi)掌冢ǜ袷饺缦拢簒x.xx.xxxx):");62                 String date = scanner.next();63                 List<String> info = action.findGame(date);64                 System.out.println("比賽日期/t/t/t主隊(duì)/t/t客隊(duì)/t/t/t比賽結(jié)果");65                 if (info.size() != 0) {66                     for (int i = 0; i < info.size(); i++) {67                         if (i % 4 == 0 && i != 0) {68                             System.out.println();69                         }70                         System.out.print(info.get(i) + "/t/t");71                     }72                 } else {73                     System.out.println("暫時沒有您所提供的比賽信息,敬請關(guān)注...");74                 }75                 System.out.println();76                 System.out77                         .println("------------------------------------------------------------------------");78             } else {79                 System.out.println("請輸入正確的對應(yīng)編號..");80                 System.out81                         .println("------------------------------------------------------------------------");82             }83         }84     }85 86 }
FootBallMain.java


發(fā)表評論 共有條評論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 邹城市| 新沂市| 富阳市| 逊克县| 延津县| 义乌市| 苍南县| 黄龙县| 清丰县| 夏邑县| 水富县| 武山县| 北票市| 兰溪市| 彩票| 台中县| 天镇县| 永和县| 贡山| 富平县| 温州市| 获嘉县| 万盛区| 榆社县| 昌乐县| 申扎县| 新河县| 宿迁市| 清丰县| 曲阜市| 淅川县| 南江县| 渭南市| 萨迦县| 尼玛县| 宁晋县| 腾冲县| 绿春县| 临西县| 仙游县| 赫章县|