`
bo_hai
  • 浏览: 554233 次
  • 性别: Icon_minigender_1
  • 来自: 武汉
社区版块
存档分类
最新评论

Hadoop 统计专利被那些专利所引用(一)

 
阅读更多

一、以下是测试数据:

"CITING","CITED"
3858241,956203
3858241,1324234
3858241,3398406
3858241,3557384
3858241,3634889
3858242,1515701
3858242,3319261
3858242,3668705
3858242,3707004
3858243,2949611
3858243,3146465
3858243,3156927
3858243,3221341
3858243,3574238
3858243,3681785
3858243,3684611
3858244,14040
3858244,17445
3858245,17445

 注:第一列是专利号,第二列是引用的专利号。

二、Hadoop 代码如下:

 

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class PatentCitations extends Configured implements Tool {

	public static class PatentCitationsMapper extends Mapper<Text, Text, Text, Text> {

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(value, key);
		}
	}
	
	public static class PatentCitationsReduces extends Reducer<Text, Text, Text, Text> {
		
		private static Text staticVal = new Text();
		
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
			StringBuilder sb = new StringBuilder();
			for (Text value : values) {
				if (sb.length() > 0) {
					sb.append(",");
				}
				sb.append(value.toString());
			}
			staticVal.set(sb.toString());
			context.write(key,staticVal);
		}
		
	}
	
	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = getConf();
		conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, ",");
		
		Job job = new Job(getConf());
		job.setJarByClass(getClass());
		job.setJobName("patentcitations");
		
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		job.setMapperClass(PatentCitationsMapper.class);
		job.setReducerClass(PatentCitationsReduces.class);
		
		FileInputFormat.setInputPaths(job, new Path("/patent/test/input/file1.txt"));
		FileOutputFormat.setOutputPath(job, new Path("/patent/test/output"));
		
		//FileInputFormat.setInputPaths(job, new Path(args[0]));
		//FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		boolean success = job.waitForCompletion(true);
		return success ? 0: 1;
	}

	public static void main(String[] args) throws Exception{
		int result = ToolRunner.run(new PatentCitations(), args);
		System.exit(result);
	}

}

 三、执行结果如下:

 

 

"CITED"	"CITING"
1324234	3858241
14040	3858244
1515701	3858242
17445	3858245,3858244
2949611	3858243
3146465	3858243
3156927	3858243
3221341	3858243
3319261	3858242
3398406	3858241
3557384	3858241
3574238	3858243
3634889	3858241
3668705	3858242
3681785	3858243
3684611	3858243
3707004	3858242
956203	3858241

 注:17445 分别被 3858245,3858244 所引用。

 

分享到:
评论
1 楼 bo_hai 2013-11-23  

相关推荐

Global site tag (gtag.js) - Google Analytics