国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學院 > 操作系統 > 正文

hadoop學習筆記之倒排索引

2024-06-28 13:24:34
字體:
來源:轉載
供稿:網友
hadoop學習筆記之倒排索引

開發工具:eclipse

目標:對下面文檔phone_numbers進行倒排索引:

13599999999 1008613899999999 12013944444444 1380013800013722222222 1380013800018800000000 12013722222222 1008618944444444 10086

代碼:

 1 import java.io.IOException; 2 import org.apache.hadoop.conf.Configured; 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.util.Tool; 6 import org.apache.hadoop.util.ToolRunner; 7 import org.apache.hadoop.io.*; 8 import org.apache.hadoop.maPReduce.*; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;11 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;12 13 public class Test_1 extends Configured implements Tool 14 {15     enum Counter16     {17         LINESKIP, // error lines18     }19     20     public static class Map extends Mapper<LongWritable, Text, Text, Text>21     {22         public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException23         {24             String line = value.toString(); // read original data25             26             try27             {28                 // process data29                 String[] lineSplit = line.split(" ");30                 String anum = lineSplit[0];31                 String bnum = lineSplit[1];32                 33                 context.write(new Text(bnum), new Text(anum)); // map output34             }35             catch(java.lang.ArrayIndexOutOfBoundsException e)36             {37                 context.getCounter(Counter.LINESKIP).increment(1);38                 return;39             }40             41         }42     }43     public static class Reduce extends Reducer<Text, Text, Text, Text>44     {45         public void reduce(Text key, Iterable<Text>values, Context context)throws IOException, InterruptedException46         {47             String valueString;48             String out = "";49             50             for (Text value : values)51             {52                 valueString = value.toString();53                 out += valueString + "|";54             }55             56             context.write(key, new Text(out));    // reduce output57         }58     }59     public int run(String[] args)throws Exception60     {61         Configuration conf = getConf();62         63         Job job = new Job(conf, "Test_1");  // task name64         job.setJarByClass(Test_1.class);    // specified task65         66         FileInputFormat.addInputPath(job, new Path(args[0]));  // input path67         FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path68         69         job.setMapperClass(Map.class);70         job.setReducerClass(Reduce.class);71         job.setOutputFormatClass(TextOutputFormat.class);72         job.setOutputKeyClass(Text.class);73         job.setOutputValueClass(Text.class);74         75         job.waitForCompletion(true);76         77         return job.isSuccessful() ? 0 : 1;78     }79     80     public static void main(String[] args)throws Exception81     {82         int res = ToolRunner.run(new Configuration(), new Test_1(), args);83         System.exit(res);84     }85 }

運行結果:


發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 永寿县| 玉山县| 馆陶县| 吉木乃县| 霍州市| 平阴县| 兴隆县| 玉田县| 鸡泽县| 沈丘县| 读书| 沂源县| 台湾省| 时尚| 四平市| 南漳县| 黎平县| 广河县| 新闻| 兴文县| 周至县| 花莲市| 云龙县| 遵义县| 保山市| 晋江市| 蒙阴县| 松江区| 保山市| 新乐市| 益阳市| 延长县| 安溪县| 临沂市| 泰兴市| 大竹县| 西峡县| 晋江市| 巫山县| 瑞金市| 榕江县|