[hadoop2.7.1]I/O之序列化（WritableSerialization）示例

tenght

浏览: 47068 次

最近访客更多访客>>

jxqc_job

汽车城路

极品拖拉机

aubdiy

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

首先先引入hadoop2.7.1中的一个工具类GenericsUtil，顾名思义，是用来处理java泛型的一个工具类，这个类很简单，但很实用，不多说，直接上源码：

package org.apache.hadoop.util;

import java.lang.reflect.Array;
import java.util.List;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

/**
 * Contains utility methods for dealing with Java Generics. 
 */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class GenericsUtil {

  /**
   * Returns the Class object (of type <code>Class<T></code>) of the  
   * argument of type <code>T</code>. 
   * @param <T> The type of the argument
   * @param t the object to get it class
   * @return <code>Class<T></code>
   */
  public static <T> Class<T> getClass(T t) {
    @SuppressWarnings("unchecked")
    Class<T> clazz = (Class<T>)t.getClass();
    return clazz;
  }

  /**
   * Converts the given <code>List<T></code> to a an array of 
   * <code>T[]</code>.
   * @param c the Class object of the items in the list
   * @param list the list to convert
   */
  public static <T> T[] toArray(Class<T> c, List<T> list)
  {
    @SuppressWarnings("unchecked")
    T[] ta= (T[])Array.newInstance(c, list.size());

    for (int i= 0; i<list.size(); i++)
      ta[i]= list.get(i);
    return ta;
  }

  /**
   * Converts the given <code>List<T></code> to a an array of 
   * <code>T[]</code>. 
   * @param list the list to convert
   * @throws ArrayIndexOutOfBoundsException if the list is empty. 
   * Use {@link #toArray(Class, List)} if the list may be empty.
   */
  public static <T> T[] toArray(List<T> list) {
    return toArray(getClass(list.get(0)), list);
  }

}

利用上面的工具类，下面举一个TestWritableSerialization的一个示例，源码如下：

import java.io.Serializable;
import java.util.EnumSet;

import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.util.GenericsUtil;

import org.junit.Test;
import static org.junit.Assert.*;

public class THT_TestWritableSerialization {

	private static final Configuration conf = new Configuration();

	enum TestEnumSet {
		CREATE, OVERWRITE, APPEND;
	}

	@Test
	public void testWritableSerialization() throws Exception {
		IntWritable before1 = new IntWritable(123456789);
		byte[] hadoop = "hadoop".getBytes();
		BytesWritable before2 = new BytesWritable(hadoop);
		String[] arraywr = { "zero", "one", "two" };
		ArrayWritable arraywrA = new ArrayWritable(arraywr);
		Writable[] arraywrW = arraywrA.get();
		Writable before3 = arraywrW[0];
		EnumSetWritable<TestEnumSet> before4 = new EnumSetWritable<TestEnumSet>(
				EnumSet.of(TestEnumSet.APPEND));
		Text before5=new Text("test for you");

		IntWritable after1 = testSerialization(conf, before1);
		BytesWritable after2 = testSerialization(conf, before2);
		Writable after3 = testSerialization(conf, before3);
		EnumSetWritable<TestEnumSet> after4 = testSerialization(conf, before4);
		Text after5 = testSerialization(conf, before5);
		
		assertEquals(before1, after1);
		assertEquals(before2, after2);
		assertEquals(before3, after3);
		assertEquals(before4, after4);
		assertEquals(before5, after5);
	}

	public static <K> K testSerialization(Configuration conf, K before)
			throws Exception {

		SerializationFactory factory = new SerializationFactory(conf);
		Serializer<K> serializer = factory.getSerializer(GenericsUtil
				.getClass(before));
		Deserializer<K> deserializer = factory.getDeserializer(GenericsUtil
				.getClass(before));

		DataOutputBuffer out = new DataOutputBuffer();
		serializer.open(out);
		serializer.serialize(before);
		serializer.close();

		DataInputBuffer in = new DataInputBuffer();
		in.reset(out.getData(), out.getLength());
		deserializer.open(in);
		K after = deserializer.deserialize(null);
		deserializer.close();
		return after;
	}
}

分享到：