package kpi; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class ArrayWritableTest { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Configuration conf = new Configuration(); FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop:9000/"), conf); fileSystem.delete(new Path("/kpi__data_out_1"), true); Job job = new Job(conf, KpiWritable1.class.getName()); job.setJarByClass(KpiWritable1.class); FileInputFormat.setInputPaths(job, new Path( "hdfs://hadoop:9000/kpi_data")); FileOutputFormat.setOutputPath(job, new Path( "hdfs://hadoop:9000/kpi__data_out_1")); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.waitForCompletion(true); } static class MyMapper extends Mapper<LongWritable, Text, Text, LongArrayWritable> { Text key2 = new Text(); @Override protected void map( LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, LongArrayWritable>.Context context) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); key2.set(split[1]); String[] traffic = new String[4]; traffic[0] = split[6]; traffic[1] = split[7]; traffic[2] = split[8]; traffic[3] = split[9]; LongArrayWritable arrayWritable = new LongArrayWritable(traffic); context.write(key2, arrayWritable); } } static class MyReducer extends Reducer<Text, LongArrayWritable, Text, NullWritable> { private Text key3 = new Text(); @Override protected void reduce( Text key2, Iterable<LongArrayWritable> val2s, org.apache.hadoop.mapreduce.Reducer<Text, LongArrayWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException { long sum1 = 0; long sum2 = 0; long sum3 = 0; long sum4 = 0; for (LongArrayWritable traffic : val2s) { Writable[] writables = traffic.get(); sum1 += Long.valueOf(writables[0].toString()); sum2 += Long.valueOf(writables[1].toString()); sum3 += Long.valueOf(writables[2].toString()); sum4 += Long.valueOf(writables[3].toString()); } key3.set(key2 + " " + sum1 + " " + sum2 + " " + sum3 + " " + sum4); context.write(key3, NullWritable.get()); } } static class LongArrayWritable extends ArrayWritable { public LongArrayWritable() { super(LongWritable.class); } public LongArrayWritable(String[] string) { super(LongWritable.class); LongWritable[] longs = new LongWritable[string.length]; for (int i = 0; i < longs.length; i++) { longs[i] = new LongWritable(Long.valueOf(string[i])); } set(longs); } } }
相关推荐
Hadoop2.7.1——NFS部署
hadoop实战——初级部分学习笔记 2
为何Hadoop是分布式大数据处理的未来?如何掌握Hadoop? Hadoop的历史 始于2002年的apache项目Nutch 2003年Google发表了关于GFS的论文 2004年Nutch的开发者开发了NDFS 2004年Google发表了关于MapReduce的...
《Hadoop高级编程——构建与实现大数据解决方案》本书关注用于构建先进的、基于Hadoop的企业级应用的架构和方案,并为实现现实的解决方案提供深入的、代码级的讲解。本书还会带你领略数据设计以及数据设计如何影响...
hadoop 权威指南 第3版,很好的书本,希望大家喜欢。pdf
Hadoop高级编程——构建与实现大数据解决方案.rar
Hadoop——分布式文件管理系统HDFS 2. Hadoop——HDFS的Shell操作 3. Hadoop——HDFS的Java API操作 4. Hadoop——分布式计算框架MapReduce 5. Hadoop——MapReduce案例 6. Hadoop——资源调度器...
hadoop双机热备——facebook hadoop HA的资料整理,流汗整理
Hadoop学习资料总结,值得推荐阅读学习 很好 非常好 值得拥有
Hadoop课程实验和报告——Hadoop安装实验报告
Hadoop学习笔记,自己总结的一些Hadoop学习笔记,比较简单。
Hadoop简单应用案例,包括MapReduce、单词统计、HDFS基本操作、web日志分析、Zookeeper基本使用、Hive简单操作等
Hadoop在雅虎的应用(Hadoop Usage At Yahoo)
Hadoop 10周年生日之际,CSDN主办的“Hadoop英雄会——暨Hadoop 10周年生日大趴”,Hulu高级研发工程师董西成介绍了Hadoop YARN程序设计与应用案例。
Hadoop快速入门——第四章、zookeeper安装包
hadoop集群配置之————flume安装配置(详细版)
Hadoop学习总结,内容包括: 1. HDFS简洁 2. HDFS读写过程解析 3. MapReduce入门 4. MapReduce过程解析 5. Hadoop运行痕迹 6. MapReduce源码分析总结