InstanceException在hadoop map reduce程序中 [英] InstantiationException in hadoop map reduce program

查看:106
本文介绍了InstanceException在hadoop map reduce程序中的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我是Hadoop框架的新手。我试图编写一个从hdfs读取XML文件的程序,使用JDOM解析它并将其发送到数据库。以下是Java文件

  package JDOMprs; 

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.conf.Configured;
导入org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

import com.vertica.hadoop.VerticaOutputFormat;
import com.vertica.hadoop.VerticaRecord;

public class ExampleParser extends Configured implements Tool {
public static class Map extends Mapper< LongWritable,Text,Text,DoubleWritable> {
private final static DoubleWritable one = new DoubleWritable(1);
私人文字=新文字();
私人列表mylist;

public void map(LongWritable key,Text value,Context context)
throws IOException,InterruptedException {
context.write(value,one);
}
}

public static class Reduce extends Reducer< Text,DoubleWritable,Text,VerticaRecord> {
VerticaRecord record = null;
String src_name;
字符串评论;
字符串rev_by;
String rev_dt;
String com_title;

public void setup(Context context)throws IOException,InterruptedException {
super.setup(context);
尝试{
record = new VerticaRecord(context.getConfiguration());
} catch(Exception e){
throw new IOException(e);


$ b $ public void reduce(Text key,Iterable< DoubleWritable> values,
Context context)throws IOException,InterruptedException {
if(record == null){
抛出新的IOException(找不到输出记录);
}

/ ******************** JDOM PARSER ************** ************* /
SAXBuilder builder = new SAXBuilder();
// File xmlFile = new
// File(C:/Users/Administrator/workspace/VerticaHadoop/src/JDOMprs/HadoopXML.xml);

尝试{
Document document =(Document)builder.build(key.toString());
元素rootNode = document.getRootElement();
List list = rootNode.getChildren(source);
// List ls = new ArrayList();
// Jdomparse jp = new Jdomparse();
// ls = jp.getParse(key);
//
for(int i = 0; i< list.size(); i ++){

元素节点=(元素)list.get(i);

// System.out.println(Source Name:+
// node.getChildText(source-name));
// System.out.println(comment:+
// node.getChildText(comment));
// System.out.println(review by:+
// node.getChildText(review-by));
// System.out.println(review date:+
// node.getChildText(review-date));
// System.out.println(comment-title:+
// node.getChildText(comment-title));

record.set(0,node.getChildText(source-name)。toString());
record.set(0,node.getChildText(comment)。toString());
record.set(0,node.getChildText(review-by)。toString());
record.set(0,node.getChildText(review-date)。toString());
record.set(0,node.getChildText(comment-title)。toString());
}

} catch(IOException io){
System.out.println(io.getMessage());
} catch(JDOMException jdomex){
System.out.println(jdomex.getMessage());
}
/ ****************** PARSER结束******************* ********** /

context.write(new Text(reviewtbl),record);



@Override
public int run(String [] args)throws Exception {
//设置配置和作业对象
配置conf = getConf();
工作职位=新职位(conf);
conf = job.getConfiguration();
conf.set(mapreduce.job.tracker,local);
job.setJobName(vertica test);

job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.class);
FileInputFormat.addInputPath(job,new Path(/ user / cloudera / input));

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(VerticaRecord.class);
job.setOutputFormatClass(VerticaOutputFormat.class);
job.setJarByClass(ExampleParser.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

VerticaOutputFormat.setOutput(job,reviewtbl,true,source varchar,
comment varchar,rev_by varchar,rev_dt varchar,
com_title VARCHAR);
job.waitForCompletion(true);
返回0;
}

public static void main(String [] args)throws Exception {
int res = ToolRunner.run(new Configuration(),new ExampleParser(),args);
System.exit(res);
}
}

但我收到以下例外。

  12/12/20 02:41:34信息mapred.JobClient:清理临时区域hdfs://0.0.0.0/var螺纹/lib/hadoop-0.20/cache/mapred/mapred/staging/root/.staging/job_201212191356_0006 
异常 主 了java.lang.RuntimeException:java.lang.InstantiationException
在org.apache .hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115)
在org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:947)
在org.apache.hadoop.mapred .JobClient.writeSplits(JobClient.java:967)
。在org.apache.hadoop.mapred.JobClient.access $ 500(JobClient.java:170)
处org.apache.hadoop.mapred.JobClient $ 2 .run(JobClient.java:880)
at org.apache.hadoop.mapred.JobClient $ 2.run(JobClient.java:833)$ b $ at java.security.AccessController.doPrivileged(Native Method)
在javax.security.auth.Subject .doAs(Subject.java:396)
在org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1177)
在org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient .java:833)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:476)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:506 )ExampleParser.run上的
(ExampleParser.java:148)org.apache.hadoop.util.ToolRunner.run上的
(ToolRunner.java:65)ExampleParser.main上的
(ExampleParser。 Java的:在sun.reflect.DelegatingMethodAccessorImpl在sun.reflect.NativeMethodAccessorImpl.invoke在sun.reflect.NativeMethodAccessorImpl.invoke0 153)
(本机方法)
(NativeMethodAccessorImpl.java:39)
。 invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java: 197)
导致:java.lang.InstantiationEx ception
在sun.reflect.InstantiationExceptionConstructorAccessorImpl.newInstance(InstantiationExceptionConstructorAccessorImpl.java:30)
处org.apache java.lang.reflect.Constructor.newInstance(Constructor.java:513)
。 hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:113)
... 19 more


解决方案

  job.setInputFormatClass(
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.class);

不能使用/实例化 FileInputFormat class:这是一个抽象类。



如果您想自己解析XML,那么您需要编写自己的 InputFormat ,它扩展 FileInputFormat ,记录读取器可以将整个内容作为值传递给映射器。我认为 Hadoop - 权威指南有一个 WholeFileInputFormat 的例子,或者类似的东西,或者Google可能会有一些东西:




I am new to Hadoop framework. I was trying to write a program which reads XML file from hdfs, parses it using JDOM and sends it to a database. The following is the Java file

package JDOMprs;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

import com.vertica.hadoop.VerticaOutputFormat;
import com.vertica.hadoop.VerticaRecord;

public class ExampleParser extends Configured implements Tool {
    public static class Map extends Mapper<LongWritable, Text, Text, DoubleWritable> {
        private final static DoubleWritable one = new DoubleWritable(1);
        private Text word = new Text();
        private List mylist;

        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            context.write(value, one);
        }
    }

    public static class Reduce extends Reducer<Text, DoubleWritable, Text, VerticaRecord> {
        VerticaRecord record = null;
        String src_name;
        String comment;
        String rev_by;
        String rev_dt;
        String com_title;

        public void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
            try {
                record = new VerticaRecord(context.getConfiguration());
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        public void reduce(Text key, Iterable<DoubleWritable> values,
                Context context) throws IOException, InterruptedException {
            if (record == null) {
                throw new IOException("No output record found");
            }

            /******************** JDOM PARSER ***************************/
            SAXBuilder builder = new SAXBuilder();
            // File xmlFile = new
            // File("C:/Users/Administrator/workspace/VerticaHadoop/src/JDOMprs/HadoopXML.xml");

            try {
                Document document = (Document) builder.build(key.toString());
                Element rootNode = document.getRootElement();
                List list = rootNode.getChildren("source");
                // List ls= new ArrayList();
                // Jdomparse jp= new Jdomparse();
                // ls=jp.getParse(key);
                //
                for (int i = 0; i < list.size(); i++) {

                    Element node = (Element) list.get(i);

                    // System.out.println("Source Name : " +
                    // node.getChildText("source-name"));
                    // System.out.println("comment : " +
                    // node.getChildText("comment"));
                    // System.out.println("review by : " +
                    // node.getChildText("review-by"));
                    // System.out.println("review date : " +
                    // node.getChildText("review-date"));
                    // System.out.println("comment-title : " +
                    // node.getChildText("comment-title"));

                    record.set(0, node.getChildText("source-name").toString());
                    record.set(0, node.getChildText("comment").toString());
                    record.set(0, node.getChildText("review-by").toString());
                    record.set(0, node.getChildText("review-date").toString());
                    record.set(0, node.getChildText("comment-title").toString());
                }

            } catch (IOException io) {
                System.out.println(io.getMessage());
            } catch (JDOMException jdomex) {
                System.out.println(jdomex.getMessage());
            }
            /****************** END OF PARSER *****************************/

            context.write(new Text("reviewtbl"), record);
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        // Set up the configuration and job objects
        Configuration conf = getConf();
        Job job = new Job(conf);
        conf = job.getConfiguration();
        conf.set("mapreduce.job.tracker", "local");
        job.setJobName("vertica test");

        job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.class);
        FileInputFormat.addInputPath(job, new Path("/user/cloudera/input"));

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(VerticaRecord.class);
        job.setOutputFormatClass(VerticaOutputFormat.class);
        job.setJarByClass(ExampleParser.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        VerticaOutputFormat.setOutput(job, "reviewtbl", true, "source varchar",
                "comment varchar", "rev_by varchar", "rev_dt varchar",
                "com_title varchar");
        job.waitForCompletion(true);
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new ExampleParser(), args);
        System.exit(res);
    }
}

but I am getting the following exceptions.

12/12/20 02:41:34 INFO mapred.JobClient: Cleaning up the staging area hdfs://0.0.0.0/var/lib/hadoop-0.20/cache/mapred/mapred/staging/root/.staging/job_201212191356_0006
Exception in thread "main" java.lang.RuntimeException: java.lang.InstantiationException
        at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115)
        at org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:947)
        at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:967)
        at org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170)
        at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:880)
        at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:833)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1177)
        at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:833)
        at org.apache.hadoop.mapreduce.Job.submit(Job.java:476)
        at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:506)
        at ExampleParser.run(ExampleParser.java:148)
        at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
        at ExampleParser.main(ExampleParser.java:153)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
        at java.lang.reflect.Method.invoke(Method.java:597)
        at org.apache.hadoop.util.RunJar.main(RunJar.java:197)
Caused by: java.lang.InstantiationException
        at sun.reflect.InstantiationExceptionConstructorAccessorImpl.newInstance(InstantiationExceptionConstructorAccessorImpl.java:30)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
        at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:113)
        ... 19 more

解决方案

job.setInputFormatClass(
      org.apache.hadoop.mapreduce.lib.input.FileInputFormat.class);

You can't use / instantiate the FileInputFormat class: it's an abstract class.

If you want to parse the XML yourself then you'll need to write your own InputFormat that extends FileInputFormat, and the record reader can pass the entire contents to the mapper as the value. I think the Hadoop - The Definitive Guide has an example for WholeFileInputFormat, or something like that, or Google will probably have something:

这篇关于InstanceException在hadoop map reduce程序中的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆