MapReduce Old API - 将命令行参数传递给Map [英] MapReduce Old API - Passing Command Line Argument to Map
问题描述
我编写了一个MapReduce作业,用于在使用旧API存储在HDFS中的输入文件中查找搜索字符串(通过命令行参数传递)。
下面是我的Driver类 -
public class StringSearchDriver
{
public static void main (String [] args)抛出IOException
{
JobConf jc = new JobConf(StringSearchDriver.class);
jc.set(SearchWord,args [2]);
jc.setJobName(String Search);
FileInputFormat.addInputPath(jc,new Path(args [0]));
FileOutputFormat.setOutputPath(jc,new Path(args [1]));
jc.setMapperClass(StringSearchMap.class);
jc.setReducerClass(StringSearchReduce.class);
jc.setOutputKeyClass(Text.class);
jc.setOutputValueClass(IntWritable.class);
JobClient.runJob(jc);
以下是我的Mapper类 -
public class StringSearchMap扩展MapReduceBase实现
Mapper< LongWritable,Text,Text,IntWritable>
{
字符串searchWord;
public void configure(JobConf jc)
{
searchWord = jc.get(SearchWord);
$ b @Override
public void map(LongWritable key,Text value,
OutputCollector< Text,IntWritable> ; out,Reporter记者)
抛出IOException
{
String [] input = value.toString()。split();
for(String word:input)
{
if(word.equalsIgnoreCase(searchWord))
out.collect(new Text(word),new IntWritable 1));
$ b
运行时作业(传递的命令行字符串是hi),我得到下面的错误 -
14/09/21 22 :35:41信息mapred.JobClient:任务ID:attempt_201409212134_0005_m_000001_2,状态:FAILED
java.lang.Class.asSubclass中的
java.lang.ClassCastException:接口javax.xml.soap.Text
at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795)
at org.apache.hadoop.mapred.MapTask $ MapOutputBuffer。< init>(MapTask .java:964)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:422)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366 )
at org.apache.hadoop.mapred.Child $ 4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security。 auth.Subject.doAs(Subject.java:416)
at org.apache.hadoop.security.UserGroupInformation.doAs( UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
请建议。
您自动导入了错误的导入。
代替 import org.apache.hadoop.io.Text ,您可以 import javax.xml.soap.Text
您可以在博客。
一点,最好采用新API
编辑
我使用了新的API
import java.io.IOException;
import java.util.StringTokenizer;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.conf.Configured;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/ **
* @author Unmesha sreeveni
* @Date 23 sep 2014
* /
public class StringSearchDriver extends Configured implements Tool {
public static class Map扩展
Mapper< LongWritable,Text,Text,IntWritable> {
private static static IntWritable one = new IntWritable(1);
私人文字=新文字();
public void map(LongWritable key,Text value,Context context)
throws IOException,InterruptedException {
Configuration conf = context.getConfiguration();
String line = value.toString();
String searchString = conf.get(word);
StringTokenizer tokenizer = new StringTokenizer(line);
while(tokenizer.hasMoreTokens()){
String token = tokenizer.nextToken();
if(token.equals(searchString)){
word.set(token);
context.write(word,one);
$ b public static class Reduce extends
Reducer< Text,IntWritable,Text,IntWritable> ; {
$ b $ public void reduce(Text key,Iterable< IntWritable> values,
Context context)throws IOException,InterruptedException {
int sum = 0; (IntWritable val:values)
{
sum + = val.get();
}
context.write(key,new IntWritable(sum));
$ b $ public static void main(String [] args)throws Exception {
Configuration conf = new Configuration();
int res = ToolRunner.run(conf,new StringSearchDriver(),args);
System.exit(res);
$ b @Override
public int run(String [] args)throws Exception {
// TODO自动生成的方法存根
if(args .length!= 3){
System.out
.printf(Usage:Search String< input dir>< output dir>< search word> \\\
);
System.exit(-1);
}
String source = args [0];
String dest = args [1];
字符串searchword = args [2];
Configuration conf = new Configuration();
conf.set(word,searchword);
工作作业=新作业(conf,搜索字符串);
job.setJarByClass(StringSearchDriver.class);
FileSystem fs = FileSystem.get(conf);
Path in = new Path(source);
Path out = new Path(dest);
if(fs.exists(out)){
fs.delete(out,true);
}
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job,in);
FileOutputFormat.setOutputPath(job,out);
boolean sucess = job.waitForCompletion(true);
return(成功?0:1);
}
}
这是可行的。
I am coding a MapReduce job for finding the occurrence of a search string (passed through Command Line argument) in an input file stored in HDFS using old API.
Below is my Driver class -
public class StringSearchDriver
{
public static void main(String[] args) throws IOException
{
JobConf jc = new JobConf(StringSearchDriver.class);
jc.set("SearchWord", args[2]);
jc.setJobName("String Search");
FileInputFormat.addInputPath(jc, new Path(args[0]));
FileOutputFormat.setOutputPath(jc, new Path(args[1]));
jc.setMapperClass(StringSearchMap.class);
jc.setReducerClass(StringSearchReduce.class);
jc.setOutputKeyClass(Text.class);
jc.setOutputValueClass(IntWritable.class);
JobClient.runJob(jc);
}
}
Below is my Mapper Class -
public class StringSearchMap extends MapReduceBase implements
Mapper<LongWritable, Text, Text, IntWritable>
{
String searchWord;
public void configure(JobConf jc)
{
searchWord = jc.get("SearchWord");
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> out, Reporter reporter)
throws IOException
{
String[] input = value.toString().split("");
for(String word:input)
{
if (word.equalsIgnoreCase(searchWord))
out.collect(new Text(word), new IntWritable(1));
}
}
}
On running the job (command line string passed is "hi"), I am getting the below error -
14/09/21 22:35:41 INFO mapred.JobClient: Task Id : attempt_201409212134_0005_m_000001_2, Status : FAILED
java.lang.ClassCastException: interface javax.xml.soap.Text
at java.lang.Class.asSubclass(Class.java:3129)
at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:964)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:422)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:416)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Please suggest.
You auto imported the wrong import. Instead of import org.apache.hadoop.io.Text you import javax.xml.soap.Text
You can find a sample wrong import in this blog.
One point , It is better to adopt New API
EDIT
I used New Api
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author Unmesha sreeveni
* @Date 23 sep 2014
*/
public class StringSearchDriver extends Configured implements Tool {
public static class Map extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
String line = value.toString();
String searchString = conf.get("word");
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if(token.equals(searchString)){
word.set(token);
context.write(word, one);
}
}
}
}
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int res = ToolRunner.run(conf, new StringSearchDriver(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
if (args.length != 3) {
System.out
.printf("Usage: Search String <input dir> <output dir> <search word> \n");
System.exit(-1);
}
String source = args[0];
String dest = args[1];
String searchword = args[2];
Configuration conf = new Configuration();
conf.set("word", searchword);
Job job = new Job(conf, "Search String");
job.setJarByClass(StringSearchDriver.class);
FileSystem fs = FileSystem.get(conf);
Path in =new Path(source);
Path out =new Path(dest);
if (fs.exists(out)) {
fs.delete(out, true);
}
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
boolean sucess = job.waitForCompletion(true);
return (sucess ? 0 : 1);
}
}
This works.
这篇关于MapReduce Old API - 将命令行参数传递给Map的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!