火花RDD功能乔达DateTime格式的原因空指针错误 [英] joda DateTime format cause null pointer error in spark RDD functions

查看:356
本文介绍了火花RDD功能乔达DateTime格式的原因空指针错误的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

的异常信息如下

User类抛出异常:在第一阶段任务0:作业中止由于阶段失败
  在第一阶段1.0(TID 11,10.215.155.82)丢失任务0.3:1.0失败了4次,最近一次故障显示java.lang.NullPointerException在
  org.joda.time.tz.CachedDateTimeZone.getInfo(CachedDateTimeZone.java:143)
  在
  org.joda.time.tz.CachedDateTimeZone.getOffset(CachedDateTimeZone.java:103)
  在
  org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:676)
  在
  org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:521)
  在
  org.joda.time.format.DateTimeFormatter.print(DateTimeFormatter.java:625)
  在
  org.joda.time.base.AbstractDateTime.toString(AbstractDateTime.java:328)
  在
  com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41)
  在
  com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41)
  在
  scala.collection.TraversableLike $$ anonfun $ GROUPBY $ 1.适用(TraversableLike.scala:328)
  在
  scala.collection.TraversableLike $$ anonfun $ GROUPBY $ 1.适用(TraversableLike.scala:327)
  在scala.collection.Iterator $ class.foreach(Iterator.scala:727)在
  org.apache.spark.util.collection.CompactBuffer $$匿名$ 1.foreach(CompactBuffer.scala:113)
  在scala.collection.IterableLike $ class.foreach(IterableLike.scala:72)
  在
  org.apache.spark.util.collection.CompactBuffer.foreach(CompactBuffer.scala:28)
  在
  scala.collection.TraversableLike $ class.groupBy(TraversableLike.scala:327)
  在
  org.apache.spark.util.collection.CompactBuffer.groupBy(CompactBuffer.scala:28)
  在
  com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:41)
  在
  com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:40)
  在scala.collection.Iterator $$不久$ 11.next(Iterator.scala:328)在
  scala.collection.Iterator $$匿名$ 10.next(Iterator.scala:312)在
  scala.collection.Iterator $ class.foreach(Iterator.scala:727)在
  scala.collection.AbstractIterator.foreach(Iterator.scala:1157)在
  scala.collection.generic.Growable $ $类加$另加$ EQ(Growable.scala:48)。
  在
  scala.collection.mutable.ArrayBuffer $另加$另加$ EQ(ArrayBuffer.scala:103)。
  在
  scala.collection.mutable.ArrayBuffer $另加$另加$ EQ(ArrayBuffer.scala:47)。
  在
  scala.collection.TraversableOnce $ class.to(TraversableOnce.scala:273)
  在scala.collection.AbstractIterator.to(Iterator.scala:1157)在
  scala.collection.TraversableOnce $ class.toBuffer(TraversableOnce.scala:265)
  在scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)在
  scala.collection.TraversableOnce $ class.toArray(TraversableOnce.scala:252)
  在scala.collection.AbstractIterator.toArray(Iterator.scala:1157)在
  org.apache.spark.rdd.RDD $$ anonfun $ 26.apply(RDD.scala:1081)在
  org.apache.spark.rdd.RDD $$ anonfun $ 26.apply(RDD.scala:1081)在
  org.apache.spark.SparkContext $$ anonfun $ runJob $ 4.适用(SparkContext.scala:1314)
  在
  org.apache.spark.SparkContext $$ anonfun $ runJob $ 4.适用(SparkContext.scala:1314)
  在org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
  在org.apache.spark.scheduler.Task.run(Task.scala:56)在
  org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:196)
  在
  java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  在
  java.util.concurrent.ThreadPoolExecutor中的$ Worker.run(ThreadPoolExecutor.java:615)
  在java.lang.Thread.run(Thread.java:744)

User class threw exception: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 11, 10.215.155.82): java.lang.NullPointerException at org.joda.time.tz.CachedDateTimeZone.getInfo(CachedDateTimeZone.java:143) at org.joda.time.tz.CachedDateTimeZone.getOffset(CachedDateTimeZone.java:103) at org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:676) at org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:521) at org.joda.time.format.DateTimeFormatter.print(DateTimeFormatter.java:625) at org.joda.time.base.AbstractDateTime.toString(AbstractDateTime.java:328) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41) at scala.collection.TraversableLike$$anonfun$groupBy$1.apply(TraversableLike.scala:328) at scala.collection.TraversableLike$$anonfun$groupBy$1.apply(TraversableLike.scala:327) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at org.apache.spark.util.collection.CompactBuffer$$anon$1.foreach(CompactBuffer.scala:113) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at org.apache.spark.util.collection.CompactBuffer.foreach(CompactBuffer.scala:28) at scala.collection.TraversableLike$class.groupBy(TraversableLike.scala:327) at org.apache.spark.util.collection.CompactBuffer.groupBy(CompactBuffer.scala:28) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:41) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:40) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$10.next(Iterator.scala:312) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) at scala.collection.AbstractIterator.to(Iterator.scala:1157) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) at org.apache.spark.rdd.RDD$$anonfun$26.apply(RDD.scala:1081) at org.apache.spark.rdd.RDD$$anonfun$26.apply(RDD.scala:1081) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)

我的code如下:

package com.tencent.ieg.face.demo

import org.apache.hadoop.conf.Configuration
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.{ SparkConf, SparkContext }
import org.joda.time.DateTime
import org.joda.time.format.{ DateTimeFormat, DateTimeFormatter }

import com.tencent.ieg.face.{ FaceConf, TestConf }
import com.tencent.ieg.face.{ FaceTest, FaceDrive, MainApp }
import com.tencent.ieg.face.game.Login
import com.tencent.ieg.face.utils.RDDImplicits._
import com.tencent.ieg.face.utils.TdwUtils

import com.tencent.tdw.spark.api.TDWSparkContext

object DateTimeNullReferenceReappear extends App {

  case class Record(uin: String = "", date: DateTime = null, value: Double = 0.0) 

  val cfg = new Configuration
  val sparkConf = new SparkConf()
  sparkConf.setAppName("bourne_exception_reappear")
  val sc = new SparkContext(sparkConf)

val data = TDWSparkContext.tdwTable(   // this function just read data from an data warehouse
  sc,
  tdwuser = FaceConf.TDW_USER,
  tdwpasswd = FaceConf.TDW_PASSWORD,
  dbName = "my_db",
  tblName = "my_table",
  parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
  .map(row => {
    Record(uin = row(2),
      date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
      value = row(4).toDouble)
  }).map(x => (x.uin, (x.date, x.value)))
  .groupByKey
  .map(x => {
    x._2.groupBy(_._1.toString("yyyyMMdd")).mapValues(_.map(_._2).sum)   // throw exception here
  })

//      val data = TDWSparkContext.tdwTable(  // It works, as I don't user datetime toString in the groupBy 
//      sc,
//      tdwuser = FaceConf.TDW_USER,
//      tdwpasswd = FaceConf.TDW_PASSWORD,
//      dbName = "hy",
//      tblName = "t_dw_cf_oss_tblogin",
//      parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
//      .map(row => {
//        Record(uin = row(2),
//          date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
//          value = row(4).toDouble)
//      }).map(x => (x.uin, (x.date.toString("yyyyMMdd"), x.value)))
//      .groupByKey
//      .map(x => {
//        x._2.groupBy(_._1).mapValues(_.map(_._2).sum)
//      })

  data.take(10).map(println)

}

所以,似乎电话的的toString GROUPBY 引起异常,因此任何人可以解释一下吗?

So, it seems that call toString in the groupBy cause the exception, so can anybody explain it?

感谢

推荐答案

您需要或者禁用KRYO,使用 KRYO JodaTime串行器,或避免序列化DateTime对象,即绕过多头。

You need to either disable Kryo, use Kryo JodaTime Serializers, or avoid serializing the DateTime object, i.e. pass around Longs.

这篇关于火花RDD功能乔达DateTime格式的原因空指针错误的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆