`
风过无声
  • 浏览: 87969 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

Hadoop 第一个Hadoop程序

 
阅读更多

1. 旧版API

-- 源代码

MaxTemperatureMapper.java

package com.hadoop.study.chap01;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureMapper extends MapReduceBase implements
		Mapper<LongWritable, Text, Text, IntWritable> {
	
	private static final int MISSING = 9999;
	
	@Override
	public void map(LongWritable key, Text value,
			OutputCollector<Text, IntWritable> output, Reporter report)
			throws IOException {
		
		String line = value.toString();
		
		String year = line.substring(15, 19);
		
		int airTemperature;
		if (line.charAt(87) == '+') {
			airTemperature = Integer.parseInt(line.substring(88, 92));
		} else {
			airTemperature = Integer.parseInt(line.substring(87, 92));
		}
		
		String quaility = line.substring(92, 93);
		if (airTemperature != MISSING && quaility.matches("[01459]")) {
			output.collect(new Text(year), new IntWritable(airTemperature));
		}
		
	}

}

 MaxTemperatureReducer.java

package com.hadoop.study.chap01;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureReducer extends MapReduceBase implements
		Reducer<Text, IntWritable, Text, IntWritable> {

	@Override
	public void reduce(Text key, Iterator<IntWritable> values,
			OutputCollector<Text, IntWritable> output, Reporter report)
			throws IOException {
		
		int maxAirTemperature = Integer.MIN_VALUE;
		while (values.hasNext()) {
			maxAirTemperature = Math.max(maxAirTemperature, values.next().get());
		}
		
		output.collect(key, new IntWritable(maxAirTemperature));
	}

}

 MaxTemperature.java

package com.hadoop.study.chap01;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;


public class MaxTemperature {
	
	public static void main(String[] args) throws IOException {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		JobConf conf = new JobConf(MaxTemperature.class);
		conf.setJobName("Max Temperature");
		
		FileInputFormat.addInputPath(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));
		
		conf.setMapperClass(MaxTemperatureMapper.class);
		conf.setReducerClass(MaxTemperatureReducer.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(IntWritable.class);
		
		JobClient.runJob(conf);
		
	}
	
}

-- 执行

1) 将程序打包成hadoop-study.jar

2) 将输入文件1901上传至hadoop的master节点的/home/hadoop/input目录下

3) 将输入文件导入HDFS中

hadoop fs -copyFromLocal /home/hadoop/input input

4) 将jar包上传至hadoop的master节点的/home/hadoop/task目录下

5) 运行代码

hadoop jar /home/hadoop/task/hadoop-study.jar com.hadoop.study.chap01.MaxTemperature input/1901 output

-- 控制台输出

14/02/24 23:03:20 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:03:20 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:03:20 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:03:20 INFO mapred.FileInputFormat: Total input paths to process : 1
14/02/24 23:03:21 INFO mapred.JobClient: Running job: job_201402241759_0005
14/02/24 23:03:22 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:03:29 INFO mapred.JobClient:  map 50% reduce 0%
14/02/24 23:03:31 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:03:37 INFO mapred.JobClient:  map 100% reduce 16%
14/02/24 23:04:15 INFO mapred.JobClient: Task Id : attempt_201402241759_0005_m_000001_0, Status : FAILED
Too many fetch-failures
14/02/24 23:04:16 WARN mapred.JobClient: Error reading task outputConnection refused
14/02/24 23:04:16 WARN mapred.JobClient: Error reading task outputConnection refused
14/02/24 23:04:17 INFO mapred.JobClient:  map 50% reduce 16%
14/02/24 23:04:20 INFO mapred.JobClient:  map 100% reduce 16%
14/02/24 23:04:38 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:04:40 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:04:41 INFO mapred.JobClient: Job complete: job_201402241759_0005
14/02/24 23:04:41 INFO mapred.JobClient: Counters: 30
14/02/24 23:04:41 INFO mapred.JobClient:   Job Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:04:41 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=16037
14/02/24 23:04:41 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:04:41 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:04:41 INFO mapred.JobClient:     Launched map tasks=3
14/02/24 23:04:41 INFO mapred.JobClient:     Data-local map tasks=3
14/02/24 23:04:41 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=69940
14/02/24 23:04:41 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Bytes Read=890559
14/02/24 23:04:41 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:04:41 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:04:41 INFO mapred.JobClient:     FILE_BYTES_READ=72210
14/02/24 23:04:41 INFO mapred.JobClient:     HDFS_BYTES_READ=890763
14/02/24 23:04:41 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=305149
14/02/24 23:04:41 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:04:41 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:04:41 INFO mapred.JobClient:     Map output materialized bytes=72216
14/02/24 23:04:41 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce shuffle bytes=72216
14/02/24 23:04:41 INFO mapred.JobClient:     Spilled Records=13128
14/02/24 23:04:41 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:04:41 INFO mapred.JobClient:     Total committed heap usage (bytes)=412942336
14/02/24 23:04:41 INFO mapred.JobClient:     CPU time spent (ms)=3780
14/02/24 23:04:41 INFO mapred.JobClient:     Map input bytes=888190
14/02/24 23:04:41 INFO mapred.JobClient:     SPLIT_RAW_BYTES=204
14/02/24 23:04:41 INFO mapred.JobClient:     Combine input records=0
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce input records=6564
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:04:41 INFO mapred.JobClient:     Combine output records=0
14/02/24 23:04:41 INFO mapred.JobClient:     Physical memory (bytes) snapshot=333090816
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:04:41 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=1122193408
14/02/24 23:04:41 INFO mapred.JobClient:     Map output records=6564

2.新版API

-- 源代码

MaxTemperatureMapper.java

package com.hadoop.study.chap01.news;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemperatureMapper extends
		Mapper<LongWritable, Text, Text, IntWritable> {
	
	private static final int MISSING = 9999;
	
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		
		String line = value.toString();
		
		String year = line.substring(15, 19);
		
		int airTemperature;
		if (line.charAt(87) == '+') {
			airTemperature = Integer.parseInt(line.substring(88, 92));
		} else {
			airTemperature = Integer.parseInt(line.substring(87, 92));
		}
		
		String quaility = line.substring(92, 93);
		if (airTemperature != MISSING && quaility.matches("[01459]")) {
			context.write(new Text(year), new IntWritable(airTemperature));
		}
		
	}
	
	
	
}

 MaxTemperatureReducer.java

package com.hadoop.study.chap01.news;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReducer extends
		Reducer<Text, IntWritable, Text, IntWritable> {

	@Override
	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
			throws IOException, InterruptedException {
		
		int maxAirTemperature = Integer.MIN_VALUE;
		
		for (IntWritable airTemperature : values) {
			maxAirTemperature = Math.max(maxAirTemperature, airTemperature.get());
		}
		
		context.write(new Text(key), new IntWritable(maxAirTemperature));
	}

}

 MaxTemperature.java

package com.hadoop.study.chap01.news;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperature {
	
	public static void main(String[] args) throws Exception {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		Job job = new Job();
		
		job.setJarByClass(MaxTemperature.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(MaxTemperatureMapper.class);
		job.setReducerClass(MaxTemperatureReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	
}

-- 执行

参考旧版本执行步骤

-- 控制台输出

14/02/24 23:10:37 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:10:37 INFO input.FileInputFormat: Total input paths to process : 1
14/02/24 23:10:37 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:10:37 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:10:38 INFO mapred.JobClient: Running job: job_201402241759_0006
14/02/24 23:10:39 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:10:45 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:10:53 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:10:55 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:10:56 INFO mapred.JobClient: Job complete: job_201402241759_0006
14/02/24 23:10:56 INFO mapred.JobClient: Counters: 29
14/02/24 23:10:56 INFO mapred.JobClient:   Job Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=6900
14/02/24 23:10:56 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:10:56 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:10:56 INFO mapred.JobClient:     Launched map tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     Data-local map tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=9502
14/02/24 23:10:56 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:10:56 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:10:56 INFO mapred.JobClient:     FILE_BYTES_READ=72210
14/02/24 23:10:56 INFO mapred.JobClient:     HDFS_BYTES_READ=888304
14/02/24 23:10:56 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=252199
14/02/24 23:10:56 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:10:56 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Bytes Read=888190
14/02/24 23:10:56 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:10:56 INFO mapred.JobClient:     Map output materialized bytes=72210
14/02/24 23:10:56 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce shuffle bytes=72210
14/02/24 23:10:56 INFO mapred.JobClient:     Spilled Records=13128
14/02/24 23:10:56 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:10:56 INFO mapred.JobClient:     CPU time spent (ms)=2050
14/02/24 23:10:56 INFO mapred.JobClient:     Total committed heap usage (bytes)=210173952
14/02/24 23:10:56 INFO mapred.JobClient:     Combine input records=0
14/02/24 23:10:56 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce input records=6564
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:10:56 INFO mapred.JobClient:     Combine output records=0
14/02/24 23:10:56 INFO mapred.JobClient:     Physical memory (bytes) snapshot=190836736
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:10:56 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=748298240
14/02/24 23:10:56 INFO mapred.JobClient:     Map output records=6564

 MaxTemperatureWithCombiner.java

package com.hadoop.study.chap01.news;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperatureWithCombiner {
	
	public static void main(String[] args) throws Exception {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		Job job = new Job();
		job.setJobName("Max Temperature");
		job.setJarByClass(MaxTemperatureWithCombiner.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(MaxTemperatureMapper.class);
		job.setReducerClass(MaxTemperatureReducer.class);
		job.setCombinerClass(MaxTemperatureReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	
}

 -- 控制台输出

14/02/24 23:12:16 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:12:17 INFO input.FileInputFormat: Total input paths to process : 1
14/02/24 23:12:17 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:12:17 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:12:17 INFO mapred.JobClient: Running job: job_201402241759_0007
14/02/24 23:12:18 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:12:30 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:12:41 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:12:43 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:12:44 INFO mapred.JobClient: Job complete: job_201402241759_0007
14/02/24 23:12:44 INFO mapred.JobClient: Counters: 29
14/02/24 23:12:44 INFO mapred.JobClient:   Job Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=10591
14/02/24 23:12:44 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:12:44 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:12:44 INFO mapred.JobClient:     Launched map tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     Data-local map tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=13038
14/02/24 23:12:44 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:12:44 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:12:44 INFO mapred.JobClient:     FILE_BYTES_READ=17
14/02/24 23:12:44 INFO mapred.JobClient:     HDFS_BYTES_READ=888304
14/02/24 23:12:44 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=108261
14/02/24 23:12:44 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:12:44 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Bytes Read=888190
14/02/24 23:12:44 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:12:44 INFO mapred.JobClient:     Map output materialized bytes=17
14/02/24 23:12:44 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce shuffle bytes=17
14/02/24 23:12:44 INFO mapred.JobClient:     Spilled Records=2
14/02/24 23:12:44 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:12:44 INFO mapred.JobClient:     CPU time spent (ms)=4460
14/02/24 23:12:44 INFO mapred.JobClient:     Total committed heap usage (bytes)=210173952
14/02/24 23:12:44 INFO mapred.JobClient:     Combine input records=6564
14/02/24 23:12:44 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce input records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:12:44 INFO mapred.JobClient:     Combine output records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Physical memory (bytes) snapshot=191209472
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=748470272
14/02/24 23:12:44 INFO mapred.JobClient:     Map output records=6564

3. 对比

--Mapper,Reducer的实现由实现接口变成继承类

--使用Job类来控制作业,而不是JobClient

--控制台输出,旧版本Launched map tasks=3(除去FAILED,数量为2),新版本Launched map tasks=1,使用combiner之后Reduce input records=1

4. 参考资料

Hadoop权威指南

分享到:
评论

相关推荐

    Hadoop权威指南 中文版

    hadoop的i/o、mapreduce应用程序开发;mapreduce的工作机制:mapreduce的类型和格式;mapreduce的特性:如何安装hadoop集群,如何管理hadoop;pig简介:hbase简介:zookeeper简介,最后还提供了丰富的案例分析。  ...

    Hadoop权威指南中文版第一版

    Hadoop的I/O、MapReduce应用程序开发;MapReduce的工作机制;MapReduce的类型和格式;MapReduce的特性;如何安装Hadoop集群,如何管理Hadoop;Pig简介;Hbase简介;ZooKeeper简介,最后还提供了丰富的案例分析。

    Hadoop权威指南 第二版(中文版)

    Hadoop的I/O、MapReduce应用程序开发;MapReduce的工作机制;MapReduce的类型和格式;MapReduce的特性;如何构建Hadoop集群,如何管理Hadoop;Pig简介;Hbase简介;Hive简介;ZooKeeper简介;开源工具Sqoop,最后还...

    新版Hadoop视频教程 段海涛老师Hadoop八天完全攻克Hadoop视频教程 Hadoop开发

    第一天 hadoop的基本概念 伪分布式hadoop集群安装 hdfs mapreduce 演示 01-hadoop职位需求状况.avi 02-hadoop课程安排.avi 03-hadoop应用场景.avi 04-hadoop对海量数据处理的解决思路.avi 05-hadoop版本选择和...

    Hadoop实战中文版

    第一部分 Hadoop——一种分布式编程框架 第1章 Hadoop简介 1.1 为什么写《Hadoop 实战》 1.2 什么是Hadoop 1.3 了解分布式系统和Hadoop 1.4 比较SQL 数据库和Hadoop 1.5 理解MapReduce 1.5.1 动手扩展一个...

    Hadoop开发者第一期入门专刊

    19 在Windows 上使用eclipse 编写Hadoop 应用程序 24 在Windows 中使用Cygwin 安装HBase 28 Nutch 与Hadoop 的整合与部署 31 在Windows eclipse 上单步调试Hive 教程 38 Hive 应用介绍 42 Hive 执行计划解析 50 ...

    Hadoop权威指南_第四版_中英文

    [1] Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的...

    【大数据入门笔记系列】第五节 SpringBoot集成hadoop开发环境(复杂版的WordCount)

    【大数据入门笔记系列】第五节 SpringBoot集成hadoop开发环境(复杂版的WordCount)前言环境清单创建SpringBoot项目创建包创建yml添加集群主机名映射hadoop配置文件环境变量HADOOP_HOME编写代码添加hadoop依赖jar包...

    Hadoop权威指南第四版pdf 英文原版高清

    [1] Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的...

    Hadoop实战-第2版-陆嘉恒.pdf

    MapReduce应用程序5. MapReduce应用案例6. MapReduce工作机制7. Hadoop I/O操作8. 下一代MapReduce: Yarn9. HDFS简介10. HDFS文件结构11. Hive详解12. HBase详解13. Mahout简介14. Pig详解15. ZooKeeper详解16. ...

    Hadoop权威指南_保证第四版_中文版

    [1] Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的...

    Hadoop的单机伪分布式搭建和运行第一个WordCount程序

    Hadoop的单机伪分布式搭建和运行第一个WordCount程序 • 环境: macOs下 Eclipse(Neon)+Hadoop-2.5.2(64位) 注:已经安装的java环境,我的事jdk1.8的

    Hadoop权威指南第三版

    Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的数据...

    Hadoop权威指南第3版+修订版.pdf

    [1] Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的...

    Hadoop学习全程记录-在Eclipse中运行第一个MapReduce程序.docx

    Hadoop学习过程中的记录笔记:如何在Eclipse下写第一个MapReduce程序

    Hadoop权威指南(中文版)2015上传.rar

    第1章 初识Hadoop 数据!数据! 数据存储与分析 与其他系统相比 关系型数据库管理系统 网格计算 志愿计算 1.3.4 Hadoop 发展简史 Apache Hadoop和Hadoop生态圈 第2章 关于MapReduce 一个气象数据集 数据的格式 使用...

    hadoop段海涛老师八天实战视频

    第一天 hadoop的基本概念 伪分布式hadoop集群安装 hdfs mapreduce 演示 01-hadoop职位需求状况.avi 02-hadoop课程安排.avi 03-hadoop应用场景.avi 04-hadoop对海量数据处理的解决思路.avi 05-hadoop版本选择和...

    基于Hadoop技术的大数据就业岗位数据分析.docx

    基于Hadoop技术的大数据就业岗位数据分析 作者:梁天友 邱敏 来源:《电脑知识与技术》2021年第31期 基于Hadoop技术的大数据就业岗位数据分析全文共10页,当前为第1页。 基于Hadoop技术的大数据就业岗位数据分析全文...

    Hadoop实战中文版.PDF

    目录编辑第一部分 Hadoop——一种分布式编程框架第1章 Hadoop简介 21.1 为什么写《Hadoop 实战》 31.2 什么是Hadoop 31.3 了解分布式系统和Hadoop 41.4 比较SQL数据库和Hadoop 51.5 理解MapReduce 61.5...

Global site tag (gtag.js) - Google Analytics