一、在(一)中,我们计算出专利被那些专利所引用,在此基础上计算被引用的次数,不难实现。我们用两种方法来完成。代码(1)如下:
import java.io.IOException; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class PatentCitationsCount extends Configured implements Tool{ public static class PatentCitationsCountMapper extends Mapper<Text, Text, Text, IntWritable> { private final IntWritable length = new IntWritable(); @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String[] array = value.toString().split("[,]"); length.set(array.length); context.write(key, length); } } public static class PatentCitationsCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private final IntWritable sumResult = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable intWritable : values) { sum += intWritable.get(); } sumResult.set(sum); context.write(key, sumResult); } } @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(getClass()); job.setJobName("patentcitationscount"); job.setMapperClass(PatentCitationsCountMapper.class); job.setReducerClass(PatentCitationsCountReducer.class); job.setCombinerClass(PatentCitationsCountReducer.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputKeyClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("/patent/test/input/patentcitationscount.txt")); FileOutputFormat.setOutputPath(job, new Path("/patent/test/outnput")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; } public static void main(String[] args) throws Exception { int result = ToolRunner.run(new PatentCitationsCount(), args); System.exit(result); } }
代码2如下:
import java.io.IOException; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class PatentCitationsCountOther extends Configured implements Tool { public static class CountOtherMapper extends Mapper<Text, Text, Text, Text> { @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); } } public static class CountOtherReducer extends Reducer<Text, Text, Text, IntWritable> { private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int sum = 0; for (Text text : values) { String[] array = text.toString().split("[,]"); sum += array.length; } result.set(sum); context.write(key, result); } } @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(getClass()); job.setJobName("patentcitationscountother"); job.setMapperClass(CountOtherMapper.class); job.setReducerClass(CountOtherReducer.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path( "/patent/test/input/patentcitationscount.txt")); FileOutputFormat.setOutputPath(job, new Path("/patent/test/outnput")); boolean success = job.waitForCompletion(true); return success ? 0 : 1; } public static void main(String[] args) throws Exception { int result = ToolRunner.run(new PatentCitationsCountOther(), args); System.exit(result); } }
都可以实现相同的功能。但我不知道谁更好?
相关推荐
Hadoop按日期统计访问次数代码实现,以及包含测试用的数据
Hadoop统计单词出现次数.rar
hadoop流量统计程序hadoop流量统计程序hadoop流量统计程序hadoop流量统计程序
对于基站数据,统计用户数,统计用户拨打电话的次数
基于hadoop的词频统计,通过空格作为词频切分,简单统计了哈姆雷特节选的词频数量。
该文件可以帮助我们练习Hadoop的统计功能。
完整的词频统计MapReduce版本。基于Hadoop2.2.0,包含一个十万单词左右的测试文件。请参照 http://blog.csdn.net/zythy/article/details/17888439 获取详细解说。
hadoop词频统计完整版!!!!!!!包含代码以及详细步骤。................................................................................................
Hadoop中单词统计案例运行的代码
Hadoop 分析统计学生考试成绩1
在hadoop平台上,用mapreduce编程实现大数据的词频统计
hadoop统计服务器kpi
第二章(Hadoop大数据处理实战)搭建Hadoop分布式集群.pdf第二章(Hadoop大数据处理实战)搭建Hadoop分布式集群.pdf第二章(Hadoop大数据处理实战)搭建Hadoop分布式集群.pdf第二章(Hadoop大数据处理实战)搭建Hadoop分布式...
mapreduce在hadoop实现词统计和列式统计,mrwordcount工程是统计hadoop文件中的词数,mrflowcount工程是统hadoop文件中的列表
基于Hadoop2.2.0的词频统计的例子。包含一个大概十万以上单词的测试数据文件。重写了Partitioner和Combiner,供学习之用。 访问博文 http://blog.csdn.net/zythy/article/details/17852579 以查看详细讲解。
在windows环境下开发hadoop时,需要配置HADOOP_HOME环境变量,变量值D:\hadoop-common-2.7.3-bin-master,并在Path追加%HADOOP_HOME%\bin,有可能出现如下错误: org.apache.hadoop.io.nativeio.NativeIO$Windows....
Hadoop权威指南中文版(第二版)+Hadoop in Action(英文版) + pro Hadoop(英文版)
本书从Hadoop的缘起开始,由浅入深,结合理论和实践,全方位地介绍Hadoop这一高性能处理海量数据集的理想工具。全书共16章,3个附录,涉及的主题包括:Haddoop简介;MapReduce简介;Hadoop分布式文件系统;Hadoop...
Hadoop课程实验和报告——每年申请美国专利的国家数统计