《基于Java的數(shù)據(jù)采集(一)》:http://m.survivalescaperooms.com/lichenwei/p/3904715.html
《基于Java的數(shù)據(jù)采集(二)》:http://m.survivalescaperooms.com/lichenwei/p/3905370.html
《基于Java的數(shù)據(jù)采集(終結(jié)篇)》:http://m.survivalescaperooms.com/lichenwei/p/3910492.html
基于之前2篇Java數(shù)據(jù)采集入庫,做了下功能整合,實(shí)現(xiàn)本地的存讀取,上個效果圖:



直接上代碼吧,本程序只是作為"如何用JAVA抓取頁面簡單采集入庫"的入門,在實(shí)際做采集工具的時候,還需考慮許多東西,比如當(dāng)采集一個頁面發(fā)生卡頓時,發(fā)生延遲時怎么辦?等一系列的問題,希望這篇文字能夠拋磚引玉。
先看下項(xiàng)目結(jié)構(gòu):
一共有五個類:
MySQL.java --數(shù)據(jù)庫操作類
RegEX.java --正則匹配類
GetAllData.java --采集類
Action.java --功能實(shí)現(xiàn)類
FootBallMain.java --主程序類
其他的,直接結(jié)合前面2篇文章外加看代碼注釋吧
Mysql.java

1 package com.lcw.curl; 2 3 4 import java.sql.Connection; 5 import java.sql.DriverManager; 6 import java.sql.ResultSet; 7 import java.sql.SQLException; 8 import java.sql.Statement; 9 10 11 /**12 * 數(shù)據(jù)庫操作類,一更新,一查詢13 * @author Balla_兔子14 *15 */16 public class MySql {17 18 //定義MySql驅(qū)動,數(shù)據(jù)庫地址,數(shù)據(jù)庫用戶名 密碼, 執(zhí)行語句和數(shù)據(jù)庫連接 19 public String driver = "com.mysql.jdbc.Driver";20 public String url = "jdbc:mysql://127.0.0.1:3306/football";21 public String user = "root";22 public String passWord = "";23 public Statement stmt = null;24 public Connection conn = null;25 26 //創(chuàng)建一個插入數(shù)據(jù)的方法27 public void datatoMySql(String insertSQl) {28 29 try {30 try {31 Class.forName(driver).newInstance();32 } catch (Exception e) {33 System.out.Mysql.javaRegEX.java

1 package com.lcw.curl; 2 3 import java.util.regex.Matcher; 4 import java.util.regex.Pattern; 5 6 public class RegEX { 7 8 /** 9 * 10 * @param regex11 * 正則表達(dá)式12 * @param content13 * 所要匹配的內(nèi)容14 * @return15 */16 public String getData(String regex, String content) {17 Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);// 設(shè)定正則表達(dá)式,不區(qū)分大小寫18 Matcher matcher = pattern.matcher(content);19 if (matcher.find()) {20 return matcher.group();//返回正則匹配結(jié)果21 } else {22 return "";23 }24 }25 26 }RegEX.javaGetAllData.java

1 package com.lcw.curl; 2 3 import java.io.BufferedReader; 4 import java.io.InputStreamReader; 5 import java.net.URL; 6 7 public class GetAllData { 8 9 /**采集類10 * @param Balla_兔子11 */12 public void getAllData() {13 14 try {15 String address = "http://www.footballresults.org/league.php?league=EngDiv1";16 URL url = new URL(address);17 InputStreamReader inputStreamReader = new InputStreamReader(url18 .openStream(), "utf-8");// 打開地址,以UTF-8編碼的形式返回字節(jié)并轉(zhuǎn)為字符19 BufferedReader bufferedReader = new BufferedReader(20 inputStreamReader);// 從字符輸入流中讀取文本,緩沖各個字符,從而提供字符、數(shù)組和行的高效讀取。21 22 RegEX data = new RegEX();23 MySql mySql = new MySql();24 String content = "";// 用來接受每次讀取的行字符25 int flag = 0;// 標(biāo)志,隊(duì)伍信息剛好在日期信息后面,則正則相同,用于分離數(shù)據(jù)26 String dateRegex = "http://d{1,2}//.//d{1,2}//.//d{4}";// 日期匹配正則表達(dá)式27 String teamRegex = ">[^<>]*</a>";// 隊(duì)伍匹配正則表達(dá)式28 String scoreRegex = ">(//d{1,2}-//d{1,2})</TD>";// 比分正則表達(dá)式29 String tempDate = "";// 存儲臨時比賽時間30 String teama = "";// 存儲臨時主隊(duì)31 String teamb = "";// 存儲臨時客隊(duì)32 String score = "";// 存儲臨時比分33 int i = 0;// 記錄信息條數(shù)34 String sql = "";// 數(shù)據(jù)庫語句35 36 while ((content = bufferedReader.readLine()) != null) {// 每次讀取一行數(shù)據(jù)37 // 獲取比賽日期信息38 String dateInfo = data.getData(dateRegex, content);39 if (!dateInfo.equals("")) {40 // System.out.println("日期:" + dateInfo);41 tempDate = dateInfo;42 flag++;43 }44 // 獲取隊(duì)伍信息,需先讀到日期信息讓標(biāo)志符自增45 String teamInfo = data.getData(teamRegex, content);46 if (!teamInfo.equals("") && flag == 1) {47 teama = teamInfo.substring(1, teamInfo.indexOf("</a>"));48 // System.out.println("主隊(duì):" + teama);49 flag++;50 } else if (!teamInfo.equals("") && flag == 2) {51 teamb = teamInfo.substring(1, teamInfo.indexOf("</a>"));52 // System.out.println("客隊(duì):" + teamb);53 flag = 0;54 }55 // 獲取比分信息56 String scoreInfo = data.getData(scoreRegex, content);57 if (!scoreInfo.equals("")) {58 score = scoreInfo.substring(1, scoreInfo.indexOf("</TD>"));59 // System.out.println("比分:" + score);60 // System.out.println();61 i++;62 sql = "insert into football(`date`,`teama`,`teamb`,`score`) values('"63 + tempDate64 + "','"65 + teama66 + "','"67 + teamb68 + "','"69 + score + "')";70 mySql.datatoMySql(sql);71 System.out.println("存儲數(shù)據(jù)成功:" + i + "條");72 }73 74 }75 bufferedReader.close();76 // System.out.println("一共收集到了" + i + "條信息");77 } catch (Exception e) {78 e.printStackTrace();79 }80 81 }82 83 }GetAllData.javaAction.java

1 package com.lcw.curl; 2 3 import java.sql.ResultSet; 4 import java.sql.SQLException; 5 import java.util.ArrayList; 6 import java.util.List; 7 import java.util.Vector; 8 9 public class Action { 10 11 /** 12 * 操作一:初始化數(shù)據(jù)庫數(shù)據(jù) 13 */ 14 public void initData() { 15 String sql = "delete from football"; 16 MySql doMySql = new MySql(); 17 try { 18 doMySql.datatoMySql(sql); 19 System.out.println("數(shù)據(jù)初始化完畢!"); 20 } catch (Exception e) { 21 System.out.println("數(shù)據(jù)初始化失敗!"); 22 } 23 24 } 25 26 /** 27 * 獲取所有隊(duì)伍信息 28 * 29 * @return 30 */ 31 public Vector<String> getAllTeam() { 32 ResultSet rs = null; 33 Vector<String> vector = new Vector<String>(); 34 String sql = "select teama,teamb from football"; 35 MySql doMySql = new MySql(); 36 rs = doMySql.searchMySql(sql); 37 38 try { 39 while (rs.next()) { 40 try { 41 if (!vector.contains(rs.getString("teama"))) { 42 vector.add(rs.getString("teama")); 43 } 44 if (!vector.contains(rs.getString("teamb"))) { 45 vector.add(rs.getString("teamb")); 46 } 47 } catch (SQLException e) { 48 e.printStackTrace(); 49 } 50 } 51 } catch (SQLException e) { 52 e.printStackTrace(); 53 } 54 55 return vector; 56 57 } 58 59 /** 60 * 獲取具體某隊(duì)的比賽信息 61 * 62 * @param team 63 * @return 64 */ 65 public List<String> findTeam(String team) { 66 List<String> list = new ArrayList<String>(); 67 String sql = "select * from football where teama ='" + team 68 + "' or teamb ='" + team + "'"; 69 MySql mysql = new MySql(); 70 ResultSet rs = null; 71 rs = mysql.searchMySql(sql); 72 try { 73 while (rs.next()) { 74 list.add(rs.getString("date")); 75 list.add(rs.getString("teama")); 76 list.add(rs.getString("teamb")); 77 list.add(rs.getString("score")); 78 } 79 } catch (SQLException e) { 80 e.printStackTrace(); 81 } 82 return list; 83 84 } 85 86 public List<String> findGame(String date) { 87 List<String> list = new ArrayList<String>(); 88 ResultSet rs = null; 89 String sql = "select * from football where date ='" + date + "'"; 90 MySql mysql = new MySql(); 91 rs = mysql.searchMySql(sql); 92 try { 93 while (rs.next()) { 94 list.add(rs.getString("date")); 95 list.add(rs.getString("teama")); 96 list.add(rs.getString("teamb")); 97 list.add(rs.getString("score")); 98 } 99 } catch (SQLException e) {100 // TODO Auto-generated catch block101 e.printStackTrace();102 }103 return list;104 }105 106 }Action.javaFootBallMain.java

1 package com.lcw.curl; 2 3 import java.util.List; 4 import java.util.Scanner; 5 import java.util.Vector; 6 7 public class FootBallMain { 8 9 /**主程序類10 * @param Balla_兔子11 */12 public static void main(String[] args) {13 GetAllData allData = new GetAllData();14 Action action = new Action();15 16 while (true) {17 System.out.println("①初始化數(shù)據(jù)庫-請按 (1)");18 System.out.println("②自動化采集數(shù)據(jù)-請按(2)");19 System.out.println("③查詢參賽隊(duì)伍-請按(3)");20 System.out.println("④查詢具體球隊(duì)比賽結(jié)果-請按(4)");21 System.out.println("⑤查詢具體某天的比賽詳情-請按(5)");22 Scanner scanner = new Scanner(System.in);23 String input = scanner.next();24 if (input.equals("1")) {25 System.out.println();26 action.initData();27 System.out28 .println("-----------------------------------------------------");29 } else if (input.equals("2")) {30 System.out.println("正在采集數(shù)據(jù)...請稍后");31 allData.getAllData();32 System.out33 .println("-----------------------------------------------------");34 } else if (input.equals("3")) {35 Vector<String> allTeam = action.getAllTeam();36 System.out.println("正在獲取數(shù)據(jù)...請稍后");37 if (allTeam.size() != 0) {38 System.out.println("參賽隊(duì)伍如下:");39 for (int i = 0; i < allTeam.size(); i++) {40 System.out.println(allTeam.get(i));41 }42 }43 System.out44 .println("-----------------------------------------------------");45 } else if (input.equals("4")) {46 System.out.println("請輸入您要查詢的隊(duì)伍名:");47 String team = scanner.next();48 List<String> list = action.findTeam(team);49 System.out.println("比賽日期/t/t/t主隊(duì)/t/t客隊(duì)/t/t/t比賽結(jié)果");50 if (list.size() != 0) {51 for (int i = 0; i < list.size(); i++) {52 System.out.print(list.get(i) + "/t/t");53 }54 } else {55 System.out.println("暫時沒有您所提供隊(duì)伍的比賽信息,敬請關(guān)注...");56 }57 System.out.println();58 System.out59 .println("-----------------------------------------------------");60 } else if (input.equals("5")) {61 System.out.println("請輸入您要查詢?nèi)掌冢ǜ袷饺缦拢簒x.xx.xxxx):");62 String date = scanner.next();63 List<String> info = action.findGame(date);64 System.out.println("比賽日期/t/t/t主隊(duì)/t/t客隊(duì)/t/t/t比賽結(jié)果");65 if (info.size() != 0) {66 for (int i = 0; i < info.size(); i++) {67 if (i % 4 == 0 && i != 0) {68 System.out.println();69 }70 System.out.print(info.get(i) + "/t/t");71 }72 } else {73 System.out.println("暫時沒有您所提供的比賽信息,敬請關(guān)注...");74 }75 System.out.println();76 System.out77 .println("------------------------------------------------------------------------");78 } else {79 System.out.println("請輸入正確的對應(yīng)編號..");80 System.out81 .println("------------------------------------------------------------------------");82 }83 }84 }85 86 }FootBallMain.java
新聞熱點(diǎn)
疑難解答
圖片精選