仅当尝试打开假定的缓存文件时发生Hadoop 2 IOException [英] Hadoop 2 IOException only when trying to open supposed cache files

查看:101
本文介绍了仅当尝试打开假定的缓存文件时发生Hadoop 2 IOException的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我最近更新到hadoop 2.2(使用本教程

我的主要工作类看起来像这样,并抛出一个IOException:

  import java.io. *; 
导入java.net。*;
import java.util。*;
import java.util.regex。*;

导入org.apache.hadoop.conf。*;
导入org.apache.hadoop.fs.Path;
import org.apache.hadoop.io。*;
import org.apache.hadoop.mapreduce。*;
import org.apache.hadoop.mapreduce.lib.chain。*;
import org.apache.hadoop.mapreduce.lib.input。*;
import org.apache.hadoop.mapreduce.lib.output。*;
import org.apache.hadoop.mapreduce.lib.reduce。*;

public class UFOLocation2
{
public static class MapClass extends Mapper< LongWritable,Text,Text,LongWritable>
{
private final static LongWritable one = new LongWritable(1);
private static Pattern locationPattern = Pattern.compile([a-zA-Z] {2} [^ a-zA-Z] * $);

私人地图< String,String> stateNames;

@Override
public void setup(Context context)
{
try
{
URI [] cacheFiles = context.getCacheFiles() ;
setupStateMap(cacheFiles [0] .toString());

catch(IOException ioe)
{
System.err.println(Error reading state file。);
System.exit(1);


$ b $ public void map(LongWritable key,Text value,Context context)
抛出IOException,InterruptedException
{
String line = value.toString();
String [] fields = line.split(\t);
String location = fields [2] .trim();

if(location.length()> = 2)
{
Matcher matcher = locationPattern.matcher(location);
if(matcher.find())
{
int start = matcher.start();
String state = location.substring(start,start + 2);
context.write(new Text(lookupState(state.toUpperCase())),one);



$ b private void setupStateMap(String filename)throws IOException
{
Map< String,String> states = new HashMap< String,String>();
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = reader.readLine();
while(line!= null)
{
String [] split = line.split(\t);
states.put(split [0],split [1]);
line = reader.readLine();
}
stateNames = states;


private String lookupState(String state)
{
String fullName = stateNames.get(state);
返回fullName == null? 其他:fullName;



public static void main(String [] args)throws Exception
{
Configuration config = new Configuration();
Job job = Job.getInstance(config,UFO Location 2);
job.setJarByClass(UFOLocation2.class);

job.addCacheFile(new URI(/ user / kevin / data / states.txt));

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);

配置mapconf1 =新配置(false);
ChainMapper.addMapper(job,UFORecordValidationMapper.class,LongWritable.class,
Text.class,LongWritable.class,Text.class,mapconf1);

配置mapconf2 =新配置(false);
ChainMapper.addMapper(job,MapClass.class,LongWritable.class,
Text.class,Text.class,LongWritable.class,mapconf2);

job.setMapperClass(ChainMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);

FileInputFormat.addInputPath(job,new Path(args [0]));
FileOutputFormat.setOutputPath(job,new Path(args [1]));

System.exit(job.waitForCompletion(true)?0:1);




$ b我得到一个IOException,因为它找不到文件/user/kevin/data/states.txt,当它试图在方法 setupStateMap()中实例化 BufferredReader

解决方案

是的,它已被弃用,并且 Job.addCacheFile() 应该用于添加文件,并且可以通过 Context.getCacheFiles()

I recent updated to hadoop 2.2 (using this tutorial here).

My main job class looks like so, and throws an IOException:

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.chain.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;

public class UFOLocation2
{
    public static class MapClass extends Mapper<LongWritable, Text, Text, LongWritable>
    {
        private final static LongWritable one = new LongWritable(1);
        private static Pattern locationPattern = Pattern.compile("[a-zA-Z]{2}[^a-zA-Z]*$");

    private Map<String, String> stateNames;

    @Override
    public void setup(Context context)
    {
        try
        {
            URI[] cacheFiles = context.getCacheFiles();
            setupStateMap(cacheFiles[0].toString());
        }
        catch (IOException ioe)
        {
            System.err.println("Error reading state file.");
            System.exit(1);
        }
    }

    public void map(LongWritable key, Text value, Context context) 
                    throws IOException, InterruptedException
    {
        String line = value.toString();
        String[] fields = line.split("\t");
        String location = fields[2].trim();

        if (location.length() >= 2)
        {
            Matcher matcher = locationPattern.matcher(location);
            if (matcher.find())
            {
                int start = matcher.start();
                String state = location.substring(start, start + 2);
                context.write(new Text(lookupState(state.toUpperCase())), one);
            }
        }
    }

    private void setupStateMap(String filename) throws IOException
    {
        Map<String, String> states = new HashMap<String, String>();
        BufferedReader reader = new BufferedReader(new FileReader(filename));
        String line = reader.readLine();
        while (line != null)
        {
            String[] split = line.split("\t");
            states.put(split[0], split[1]);
            line = reader.readLine();
        }
        stateNames = states;
    }

    private String lookupState(String state)
    {
        String fullName = stateNames.get(state);
        return fullName == null ? "Other" : fullName;
    }
}

public static void main(String[] args) throws Exception
{
    Configuration config = new Configuration();
    Job job = Job.getInstance(config, "UFO Location 2");
    job.setJarByClass(UFOLocation2.class);

    job.addCacheFile(new URI("/user/kevin/data/states.txt"));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration mapconf1 = new Configuration(false);
    ChainMapper.addMapper(job, UFORecordValidationMapper.class, LongWritable.class, 
                Text.class, LongWritable.class,Text.class, mapconf1);

    Configuration mapconf2 = new Configuration(false);
    ChainMapper.addMapper(job, MapClass.class, LongWritable.class, 
                Text.class, Text.class, LongWritable.class, mapconf2);

    job.setMapperClass(ChainMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);               
}
}

I get an IOException because it can't find the file "/user/kevin/data/states.txt" when it tries to instantiate the BufferredReader in the method setupStateMap()

解决方案

Yes, it is deprecated and Job.addCacheFile() should be used to add the files and in your tasks( map or reduce) files can be accessed with Context.getCacheFiles().

这篇关于仅当尝试打开假定的缓存文件时发生Hadoop 2 IOException的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆