Mapreduce组合器 [英] Mapreduce Combiner

查看:105
本文介绍了Mapreduce组合器的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我用mapper,reducer和combiner有一个简单的mapreduce代码。
映射器的输出传递给组合器。但是对于reducer而言,不是来自combiner的输出,而是来自mapper的输出被传递。



请帮助

代码:

  package Combiner; 
import java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class AverageSalary
{
public static class Map扩展Mapper< LongWritable,Text,Text,DoubleWritable>
{
public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException
{
String [] empDetails = value.toString()。split(, );
Text unit_key = new Text(empDetails [1]);
DoubleWritable salary_value = new DoubleWritable(Double.parseDouble(empDetails [2]));
context.write(unit_key,salary_value);


$ b public static class Combiner扩展了Reducer< Text,DoubleWritable,Text,Text>
{
public void reduce(final Text key,final Iterable< DoubleWritable> values,final Context context)
{
String val;
double sum = 0;
int len = 0;
while(values.iterator()。hasNext())
{
sum + = values.iterator()。next()。get();
len ++;
}
val = String.valueOf(sum)+:+ String.valueOf(len);
尝试{
context.write(key,new Text(val));
} catch(IOException e){
// TODO自动生成的catch块
e.printStackTrace();
} catch(InterruptedException e){
// TODO自动生成的catch块
e.printStackTrace();
}
}
}
public static class Reduce extends Reducer< Text,Text,Text,Text>
{
public void reduce(final Text key,final Text values,final Context context)
{
// String [] sumDetails = values.toString()。split( :);
//双重平均;
//average=Double.parseDouble(sumDetails[0]);
尝试{
context.write(key,values);
} catch(IOException e){
// TODO自动生成的catch块
e.printStackTrace();
} catch(InterruptedException e){
// TODO自动生成的catch块
e.printStackTrace();


$ b public static void main(String args [])
{
Configuration conf = new Configuration();
尝试
{
String [] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!= 2){
System.err.println(Usage:Main<>< out>);
System.exit(-1); }
工作职位=新职位(conf,平均工资);
//job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.addInputPath(job,new Path(otherArgs [0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs [1]));
job.setJarByClass(AverageSalary.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Combiner.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

System.exit(job.waitForCompletion(true)?0:-1);
} catch(ClassNotFoundException e){
// TODO自动生成的catch块
e.printStackTrace();
} catch(IOException e){
// TODO自动生成的catch块
e.printStackTrace();
} catch(InterruptedException e){
// TODO自动生成的catch块
e.printStackTrace();
}
}

}




键/值的输入类型和
键/值的输出类型需要相同。


您不能接收 Text / DoubleWritable 并返回文本/文本。我建议你使用 Text 而不是 DoubleWritable ,并在 Combiner


I have a simple mapreduce code with mapper, reducer and combiner. The output from mapper is passed to combiner. But to the reducer, instead of output from combiner,output from mapper is passed.

Kindly help

Code:

package Combiner;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class AverageSalary
{
public static class Map extends  Mapper<LongWritable, Text, Text, DoubleWritable> 
{
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException 
    {    
        String[] empDetails= value.toString().split(",");
        Text unit_key = new Text(empDetails[1]);      
        DoubleWritable salary_value = new DoubleWritable(Double.parseDouble(empDetails[2]));
        context.write(unit_key,salary_value);    

    }  
}
public static class Combiner extends Reducer<Text,DoubleWritable, Text,Text> 
{
    public void reduce(final Text key, final Iterable<DoubleWritable> values, final Context context)
    {
        String val;
        double sum=0;
        int len=0;
        while (values.iterator().hasNext())
        {
            sum+=values.iterator().next().get();
            len++;
        }
        val=String.valueOf(sum)+":"+String.valueOf(len);
        try {
            context.write(key,new Text(val));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}
public static class Reduce extends Reducer<Text,Text, Text,Text> 
{
    public void reduce (final Text key, final Text values, final Context context)
    {
        //String[] sumDetails=values.toString().split(":");
        //double average;
        //average=Double.parseDouble(sumDetails[0]);
        try {
            context.write(key,values);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}
public static void main(String args[])
{
    Configuration conf = new Configuration();
    try
    {
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();    
     if (otherArgs.length != 2) {      
         System.err.println("Usage: Main <in> <out>");      
         System.exit(-1);    }    
     Job job = new Job(conf, "Average salary");    
     //job.setInputFormatClass(KeyValueTextInputFormat.class);    
     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));    
     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));    
     job.setJarByClass(AverageSalary.class);    
     job.setMapperClass(Map.class);    
     job.setCombinerClass(Combiner.class);
     job.setReducerClass(Reduce.class);    
     job.setOutputKeyClass(Text.class);    
     job.setOutputValueClass(Text.class);    

        System.exit(job.waitForCompletion(true) ? 0 : -1);
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

}

解决方案

It seems that you forgot about important property of a combiner:

the input types for the key/value and the output types of the key/value need to be the same.

You can't take in a Text/DoubleWritable and return a Text/Text. I suggest you to use Text Instead DoubleWritable, and do proper parsing inside Combiner.

这篇关于Mapreduce组合器的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆