在序列化中带有Avro NullPointerException的MRUnit [英] MRUnit with Avro NullPointerException in Serialization
问题描述
我试图使用MRUnit测试Hadoop .mapreduce Avro作业。我收到一个NullPointerException,如下所示。我附加了一部分的pom和源代码。任何援助将不胜感激。
谢谢
我收到的错误是:
java.lang.NullPointerException $ b $ org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:73)
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:91)
at org.apache.hadoop.mrunit.internal.io.Serialization.copyWithConf(Serialization.java:104)
at org.apache.hadoop.mrunit.TestDriver.copy(TestDriver.java:608)
at org.apache.hadoop.mrunit.MapDriverBase.setInputKey(MapDriverBase.java:64)
at org.apache.hadoop.mrunit.MapDriverBase.setInput(MapDriverBase.java:104)
at org.apache.hadoop.mrunit.MapDriverBase.withInput(MapDriverBase.java:218)
at org.lab41。 project.mapreduce.ParseMetadataAsTextIntoAvroTest.testMap(ParseMetadataAsTextIntoAvroTest.java:115)
.....
pom snippet:
< dependency>
< groupId> org.apache.mrunit< / groupId>
< artifactId> mrunit< / artifactId>
< version> 0.9.0-incubating< / version>
< classifier> hadoop2< / classifier>
< scope> test< / scope>
< /依赖关系>
< avro.version> 1.7.4< /avro.version>
< hadoop.version> 2.0.0-mr1-cdh4.1.3< /hadoop.version>
< dependency>
< groupId> org.apache.avro< / groupId>
< artifactId> avro< / artifactId>
<版本> $ {avro.version}< / version>
< /依赖关系>
< dependency>
< groupId> org.apache.hadoop< / groupId>
< artifactId> hadoop-client< / artifactId>
< version> $ {hadoop.version}< / version>
< scope>提供< / scope>
< /依赖关系>
< dependency>
< groupId> org.apache.hadoop< / groupId>
< artifactId> hadoop-core< / artifactId>
< version> $ {hadoop.version}< / version>
< scope>提供< / scope>
< /依赖关系>
< dependency>
< groupId> org.apache.avro< / groupId>
< artifactId> avro-mapred< / artifactId>
<版本> $ {avro.version}< / version>
< classifier> hadoop2< / classifier>
< /依赖关系>
以下是测试的摘录:
import static org.junit.Assert。*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.hadoop.io.AvroSerialization;
import org.apache.avro.mapred.AvroValue;
导入org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
导入org.junit.After;
import org.junit.AfterClass;
导入org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
导入org.lab41.project.domain.DataRecord;
导入org.slf4j.Logger;
导入org.slf4j.LoggerFactory;
public class ParseMetadataAsTextIntoAvroTest {
Logger logger = LoggerFactory
.getLogger(ParseMetadataAsTextIntoAvroTest.class);
私有MapDriver< LongWritable,Text,AvroKey< Long>,AvroValue< DataRecord>> mapDriver;
@BeforeClass
public static void setUpClass(){
}
@AfterClass
public static void tearDownClass(){
}
@Before
public void setUp()throws IOException {
ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
mapDriver = new MapDriver< LongWritable,Text,AvroKey< Long>,AvroValue< DataRecord>>();
mapDriver.setMapper(mapper);
mapDriver.getConfiguration()。setStrings(io.serializations,new String [] {
AvroSerialization.class.getName()
});
$ b @Test
public void testMap()throws ParseException,IOException {
Text testInputText = new Text(test0);
DataRecord record = new DataRecord();
...。
AvroKey< Long> expectedPivot =新的AvroKey< Long>(1L);
AvroValue< DataRecord> expectedRecord = new AvroValue< DataRecord>(record); (新对< LongWritable,Text>(new LongWritable(1),testInputText));
mapDriver.withInput
mapDriver.withOutput(new Pair< AvroKey< Long>,AvroValue< DataRecord>>(expectedPivot,expectedRecord));
mapDriver.runTest();
$ b
为了得到这个工作,你必须把 AvroSerializatio
添加到默认的serailizations中。您还必须配置 AvroSerializationn
。
@Before
public void setUp()throws IOException {
ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
mapDriver = new MapDriver< LongWritable,Text,AvroKey< Long>,AvroValue< NetworkRecord>>();
mapDriver.setMapper(mapper);
//复制默认的io.serializations。如果你不这样做,那么你将
//不能够反序列化映射器的输入
String [] strings = mapDriver.getConfiguration()。getStrings(io.serializations);
String [] newStrings = new String [strings.length +1];
System.arraycopy(字符串,0,newStrings,0,strings.length);
newStrings [newStrings.length-1] = AvroSerialization.class.getName();
//现在您必须通过选择关键
//编写器模式和值编写器模式来配置AvroSerialization。
mapDriver.getConfiguration()。setStrings(io.serializations,newStrings);
mapDriver.getConfiguration()。setStrings(avro.serialization.key.writer.schema,Schema.create(Schema.Type.LONG).toString(true));
mapDriver.getConfiguration()。setStrings(avro.serialization.value.writer.schema,NetworkRecord.SCHEMA $ .toString(true));
}
I'm trying to test a Hadoop .mapreduce Avro job using MRUnit. I am receiving a NullPointerException as seen below. I've attached a portion of the pom and source code. Any assistance would be appreciated.
Thanks
The error I'm getting is :
java.lang.NullPointerException
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:73)
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:91)
at org.apache.hadoop.mrunit.internal.io.Serialization.copyWithConf(Serialization.java:104)
at org.apache.hadoop.mrunit.TestDriver.copy(TestDriver.java:608)
at org.apache.hadoop.mrunit.MapDriverBase.setInputKey(MapDriverBase.java:64)
at org.apache.hadoop.mrunit.MapDriverBase.setInput(MapDriverBase.java:104)
at org.apache.hadoop.mrunit.MapDriverBase.withInput(MapDriverBase.java:218)
at org.lab41.project.mapreduce.ParseMetadataAsTextIntoAvroTest.testMap(ParseMetadataAsTextIntoAvroTest.java:115)
.....
pom snippet:
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>0.9.0-incubating</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>
<avro.version>1.7.4</avro.version>
<hadoop.version>2.0.0-mr1-cdh4.1.3</hadoop.version>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<version>${avro.version}</version>
<classifier>hadoop2</classifier>
</dependency>
Here is an excerpt of the test :
import static org.junit.Assert.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.hadoop.io.AvroSerialization;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.lab41.project.domain.DataRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ParseMetadataAsTextIntoAvroTest {
Logger logger = LoggerFactory
.getLogger(ParseMetadataAsTextIntoAvroTest.class);
private MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>> mapDriver;
@BeforeClass
public static void setUpClass() {
}
@AfterClass
public static void tearDownClass() {
}
@Before
public void setUp() throws IOException {
ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>>();
mapDriver.setMapper(mapper);
mapDriver.getConfiguration().setStrings("io.serializations", new String[]{
AvroSerialization.class.getName()
});
}
@Test
public void testMap() throws ParseException, IOException {
Text testInputText = new Text(test0);
DataRecord record = new DataRecord();
….
AvroKey<Long> expectedPivot = new AvroKey<Long>(1L);
AvroValue<DataRecord> expectedRecord = new AvroValue<DataRecord>(record);
mapDriver.withInput(new Pair<LongWritable, Text>(new LongWritable(1), testInputText));
mapDriver.withOutput(new Pair<AvroKey<Long>, AvroValue<DataRecord>>(expectedPivot, expectedRecord));
mapDriver.runTest();
}
}
In order to get this to work you have add the AvroSerializatio
to the default serailizations. You also have to configure AvroSerializationn
.
@Before
public void setUp() throws IOException {
ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<NetworkRecord>>();
mapDriver.setMapper(mapper);
//Copy over the default io.serializations. If you don't do this then you will
//not be able to deserialize the inputs to the mapper
String[] strings = mapDriver.getConfiguration().getStrings("io.serializations");
String[] newStrings = new String[strings.length +1];
System.arraycopy( strings, 0, newStrings, 0, strings.length );
newStrings[newStrings.length-1] = AvroSerialization.class.getName();
//Now you have to configure AvroSerialization by sepecifying the key
//writer Schema and the value writer schema.
mapDriver.getConfiguration().setStrings("io.serializations", newStrings);
mapDriver.getConfiguration().setStrings("avro.serialization.key.writer.schema", Schema.create(Schema.Type.LONG).toString(true));
mapDriver.getConfiguration().setStrings("avro.serialization.value.writer.schema", NetworkRecord.SCHEMA$.toString(true));
}
这篇关于在序列化中带有Avro NullPointerException的MRUnit的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!