SequenceFile和MapFile使用

username2

浏览: 722499 次
性别:
来自: 黑龙江

最近访客更多访客>>

dsh_oliver

杭州007

loginboot

xmmdream

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop学习笔记

Hadoop的HDFS和MapReduce子框架主要是针对大数据文件来设计的，在小文件的处理上不但效率低下，

而且十分消耗磁盘空间(每一个小文件占用一个Block,HDFS默认block大小为64M)。解决办法通常是选择一个容器，

将这些小文件组织起来统一存储。HDFS提供了两种类型的容器，分别是SequenceFile和MapFile。

1 SequenceFile使用

package org.tony.file;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;

public class SequenceFileWriter {
	public static String uri = "hdfs://192.168.142.128:9000";
	public static String[] data = { "one,two", "three,four", "five,six",
			"seven,eight", "night,ten" };

	public static void main(String[] args) throws IOException {
		write();
		read();
		
	}
	
	/**
	* @Title: write 
	* @Description: （1）创建Configuration；（2）获取FileSystem；（3）创建文件输出路径Path；
					（4）调用SequenceFile.createWriter得到SequenceFile.Writer对象；
					（5）调用SequenceFile.Writer.append追加写入文件；（6）关闭流； 
	* @return void    返回类型 
	* @throws
	 */
	public static void write() throws IOException {

		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);

		Path path = new Path("/tmp.seq");
		// Path path = new Path("/tmp1.seq"); 采用压缩
		IntWritable key = new IntWritable();
		Text value = new Text();

		SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path,
				key.getClass(), value.getClass());
		// SequenceFile.Writer writer =
		// SequenceFile.createWriter(fs,conf,path,key.getClass(),value.getClass(),CompressionType.RECORD,new
		// BZip2Codec()); 采用压缩，用BZip2压缩算法
		for (int i = 0; i < 100; i++) { // 写入100次
			key.set(100 - i);
			value.set(data[i % data.length]);
			writer.append(key, value);
		}
		IOUtils.closeStream(writer);
		
	}
	
	/**
	* @Title: read 
	* @Description: （1）创建Configuration；（2）获取FileSystem；（3）创建文件输出路径Path；
					（4）new一个SequenceFile Reader进行读取；（5）得到keyClass和valueClass；（6）关闭流； 
	* @return void    返回类型 
	* @throws
	 */
	public static void read() throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		Path path = new Path("/tmp.seq");
		// Path path = new Path("/tmp1.seq"); 读取压缩文件
		SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
		Writable key = (Writable) ReflectionUtils.newInstance(
				reader.getKeyClass(), conf);
		Writable value = (Writable) ReflectionUtils.newInstance(
				reader.getValueClass(), conf);
		while (reader.next(key, value)) {
			System.out.println("key = " + key);
			System.out.println("value = " + value);
			System.out.println("position = " + reader.getPosition());
		}
		IOUtils.closeStream(reader);

	}
}

2 MapFile使用

package org.tony.file;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.util.ReflectionUtils;

public class MapFileTest {
	public static String uri = "hdfs://192.168.142.128:9000"; //访问hdfs 的uri 
	public static String[] data = { "one,two", "three,four", "five,six",
			"seven,eight", "night,ten" };

	public static void main(String[] args) throws Exception {
//		write(); 
		read();
//		seqToMapFile();
	}
	
	/**用Mapfile写文件
	* @Title: write 
	* @Description: （1）创建Configuration；（2）获取FileSystem；（3）创建文件输出路径Path；
					（4）new一个MapFile.Writer对象；（5）调用MapFile.Writer.append追加写入文件；（6）关闭流； 
	* @return void    返回类型 
	* @throws
	 */
	public static void write() throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		
		Path path = new Path("/tmpdata.map");
		IntWritable key = new IntWritable();
		Text value = new Text();
		
		MapFile.Writer writer = new MapFile.Writer(conf, fs, path.toString(),
				key.getClass(), value.getClass());
		
		for (int i = 0; i < 100; i++) {
			key.set(i + 1);
			value.set(data[i % data.length]);
			writer.append(key, value);
		}
		IOUtils.closeStream(writer);

	}
	/**用Mapfile读文件
	* @Title: read 
	* @Description: （1）创建Configuration；（2）获取FileSystem；（3）创建文件输出路径Path；
					（4）new一个MapFile.Reader对象；（5）得到keyClass和valueClass；（6）关闭流； 
	* @return void    返回类型 
	* @throws
	 */
	public static void read() throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		
		Path path = new Path("/tmpdata.map");
		MapFile.Reader reader = new MapFile.Reader(fs, path.toString(), conf);
		
		WritableComparable key = (WritableComparable) ReflectionUtils
				.newInstance(reader.getKeyClass(), conf);
		Writable value = (Writable) ReflectionUtils.newInstance(
				reader.getValueClass(), conf);
		
		while (reader.next(key, value)) {
			System.out.println("key = " + key);
			System.out.println("value = " + value);
		}
		IOUtils.closeStream(reader);
	}
	
	/**将sequence文件转换为MapFile文件 
	* @Title: seqToMapFile 
	* @Description: TODO 
	* @return void    返回类型 
	* @throws
	* 1 创建tmp1.map文件夹 
	* 2 复制tmp1.seq  SequenceFile文件到tmp1.map文件夹下面，并重命名data $./hadoop fs -mv /tmp1.seq /tmp1.map/data
	* 3 运行程序 
	 */
	public static void seqToMapFile() throws Exception{
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		
		Path map = new Path("/tmp1.map"); //文件夹位置
		
		//MapFile.DATA_FILE_NAME 为seq文件移动到tmp1.map文件夹下面的文件名称 
		Path mapData = new Path(map,MapFile.DATA_FILE_NAME);
		SequenceFile.Reader reader = new SequenceFile.Reader(fs,mapData,conf);
		Class key = reader.getKeyClass();
		Class value = reader.getValueClass();
		reader.close();
		
		long entries = MapFile.fix(fs, map, key, value, false, conf);
		System.out.printf("Created MapFile %s with %d entries\n",map,entries);
	}
}

分享到：

python基础 | redis服务器安装使用

2015-12-18 14:37
浏览 2669
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

SequenceFile和MapFile使用

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

SequenceFile和MapFile使用

评论

发表评论

相关推荐

strom使用示例

Hadoop2.x动态添加或删除datanode

MapReduce2中自定义排序分组

MapReduce中自定义Combiner

2.x MapReduce的测试类

Sqoop

kafka使用与安装

storm 的安装使用

Hbase 的Java API 操作

Hbase 的java API 操作

Hbase集群安装

HIVE的安装与使用

HA 下执行JAVA操作hdfs

hadoop 2.x集群安装与配置

zookeeper安装

hadoop 2.x wordcount练习

Hadoop 2.x单节点部署学习。

重新编译Hadoop

Hadoop 中数据的序列化与反序列化

Hadoop基于文件的数据结构

最近访客更多访客>>