火花RDD功能乔达DateTime格式的原因空指针错误 [英] joda DateTime format cause null pointer error in spark RDD functions
问题描述
的异常信息如下
User类抛出异常:在第一阶段任务0:作业中止由于阶段失败
在第一阶段1.0(TID 11,10.215.155.82)丢失任务0.3:1.0失败了4次,最近一次故障显示java.lang.NullPointerException在
org.joda.time.tz.CachedDateTimeZone.getInfo(CachedDateTimeZone.java:143)
在
org.joda.time.tz.CachedDateTimeZone.getOffset(CachedDateTimeZone.java:103)
在
org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:676)
在
org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:521)
在
org.joda.time.format.DateTimeFormatter.print(DateTimeFormatter.java:625)
在
org.joda.time.base.AbstractDateTime.toString(AbstractDateTime.java:328)
在
com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41)
在
com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41)
在
scala.collection.TraversableLike $$ anonfun $ GROUPBY $ 1.适用(TraversableLike.scala:328)
在
scala.collection.TraversableLike $$ anonfun $ GROUPBY $ 1.适用(TraversableLike.scala:327)
在scala.collection.Iterator $ class.foreach(Iterator.scala:727)在
org.apache.spark.util.collection.CompactBuffer $$匿名$ 1.foreach(CompactBuffer.scala:113)
在scala.collection.IterableLike $ class.foreach(IterableLike.scala:72)
在
org.apache.spark.util.collection.CompactBuffer.foreach(CompactBuffer.scala:28)
在
scala.collection.TraversableLike $ class.groupBy(TraversableLike.scala:327)
在
org.apache.spark.util.collection.CompactBuffer.groupBy(CompactBuffer.scala:28)
在
com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:41)
在
com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:40)
在scala.collection.Iterator $$不久$ 11.next(Iterator.scala:328)在
scala.collection.Iterator $$匿名$ 10.next(Iterator.scala:312)在
scala.collection.Iterator $ class.foreach(Iterator.scala:727)在
scala.collection.AbstractIterator.foreach(Iterator.scala:1157)在
scala.collection.generic.Growable $ $类加$另加$ EQ(Growable.scala:48)。
在
scala.collection.mutable.ArrayBuffer $另加$另加$ EQ(ArrayBuffer.scala:103)。
在
scala.collection.mutable.ArrayBuffer $另加$另加$ EQ(ArrayBuffer.scala:47)。
在
scala.collection.TraversableOnce $ class.to(TraversableOnce.scala:273)
在scala.collection.AbstractIterator.to(Iterator.scala:1157)在
scala.collection.TraversableOnce $ class.toBuffer(TraversableOnce.scala:265)
在scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)在
scala.collection.TraversableOnce $ class.toArray(TraversableOnce.scala:252)
在scala.collection.AbstractIterator.toArray(Iterator.scala:1157)在
org.apache.spark.rdd.RDD $$ anonfun $ 26.apply(RDD.scala:1081)在
org.apache.spark.rdd.RDD $$ anonfun $ 26.apply(RDD.scala:1081)在
org.apache.spark.SparkContext $$ anonfun $ runJob $ 4.适用(SparkContext.scala:1314)
在
org.apache.spark.SparkContext $$ anonfun $ runJob $ 4.适用(SparkContext.scala:1314)
在org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
在org.apache.spark.scheduler.Task.run(Task.scala:56)在
org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:196)
在
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
在
java.util.concurrent.ThreadPoolExecutor中的$ Worker.run(ThreadPoolExecutor.java:615)
在java.lang.Thread.run(Thread.java:744)
User class threw exception: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 11, 10.215.155.82): java.lang.NullPointerException at org.joda.time.tz.CachedDateTimeZone.getInfo(CachedDateTimeZone.java:143) at org.joda.time.tz.CachedDateTimeZone.getOffset(CachedDateTimeZone.java:103) at org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:676) at org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:521) at org.joda.time.format.DateTimeFormatter.print(DateTimeFormatter.java:625) at org.joda.time.base.AbstractDateTime.toString(AbstractDateTime.java:328) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3$$anonfun$apply$1.apply(DateTimeNullReferenceReappear.scala:41) at scala.collection.TraversableLike$$anonfun$groupBy$1.apply(TraversableLike.scala:328) at scala.collection.TraversableLike$$anonfun$groupBy$1.apply(TraversableLike.scala:327) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at org.apache.spark.util.collection.CompactBuffer$$anon$1.foreach(CompactBuffer.scala:113) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at org.apache.spark.util.collection.CompactBuffer.foreach(CompactBuffer.scala:28) at scala.collection.TraversableLike$class.groupBy(TraversableLike.scala:327) at org.apache.spark.util.collection.CompactBuffer.groupBy(CompactBuffer.scala:28) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:41) at com.tencent.ieg.face.demo.DateTimeNullReferenceReappear$$anonfun$3.apply(DateTimeNullReferenceReappear.scala:40) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$10.next(Iterator.scala:312) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) at scala.collection.AbstractIterator.to(Iterator.scala:1157) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) at org.apache.spark.rdd.RDD$$anonfun$26.apply(RDD.scala:1081) at org.apache.spark.rdd.RDD$$anonfun$26.apply(RDD.scala:1081) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)
我的code如下:
package com.tencent.ieg.face.demo
import org.apache.hadoop.conf.Configuration
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.{ SparkConf, SparkContext }
import org.joda.time.DateTime
import org.joda.time.format.{ DateTimeFormat, DateTimeFormatter }
import com.tencent.ieg.face.{ FaceConf, TestConf }
import com.tencent.ieg.face.{ FaceTest, FaceDrive, MainApp }
import com.tencent.ieg.face.game.Login
import com.tencent.ieg.face.utils.RDDImplicits._
import com.tencent.ieg.face.utils.TdwUtils
import com.tencent.tdw.spark.api.TDWSparkContext
object DateTimeNullReferenceReappear extends App {
case class Record(uin: String = "", date: DateTime = null, value: Double = 0.0)
val cfg = new Configuration
val sparkConf = new SparkConf()
sparkConf.setAppName("bourne_exception_reappear")
val sc = new SparkContext(sparkConf)
val data = TDWSparkContext.tdwTable( // this function just read data from an data warehouse
sc,
tdwuser = FaceConf.TDW_USER,
tdwpasswd = FaceConf.TDW_PASSWORD,
dbName = "my_db",
tblName = "my_table",
parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
.map(row => {
Record(uin = row(2),
date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
value = row(4).toDouble)
}).map(x => (x.uin, (x.date, x.value)))
.groupByKey
.map(x => {
x._2.groupBy(_._1.toString("yyyyMMdd")).mapValues(_.map(_._2).sum) // throw exception here
})
// val data = TDWSparkContext.tdwTable( // It works, as I don't user datetime toString in the groupBy
// sc,
// tdwuser = FaceConf.TDW_USER,
// tdwpasswd = FaceConf.TDW_PASSWORD,
// dbName = "hy",
// tblName = "t_dw_cf_oss_tblogin",
// parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
// .map(row => {
// Record(uin = row(2),
// date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
// value = row(4).toDouble)
// }).map(x => (x.uin, (x.date.toString("yyyyMMdd"), x.value)))
// .groupByKey
// .map(x => {
// x._2.groupBy(_._1).mapValues(_.map(_._2).sum)
// })
data.take(10).map(println)
}
所以,似乎电话的的toString 在 GROUPBY 引起异常,因此任何人可以解释一下吗?
So, it seems that call toString in the groupBy cause the exception, so can anybody explain it?
感谢
推荐答案
您需要或者禁用KRYO,使用 KRYO JodaTime串行器,或避免序列化DateTime对象,即绕过多头。
You need to either disable Kryo, use Kryo JodaTime Serializers, or avoid serializing the DateTime object, i.e. pass around Longs.
这篇关于火花RDD功能乔达DateTime格式的原因空指针错误的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!