关于SparkSQL 2.4 ArrayIndexOutOfBoundsException error 的问题记录


1. 背景

测试spark ml 代码的时候报了一个数组下标越界的异常,报错日志如下:

19/11/15 11:02:21 ERROR Instrumentation: com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
scala.collection.Iterator.foreach(Iterator.scala:941)
scala.collection.Iterator.foreach$(Iterator.scala:941)
scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
scala.collection.IterableLike.foreach(IterableLike.scala:74)
scala.collection.IterableLike.foreach$(IterableLike.scala:73)
scala.collection.AbstractIterable.foreach(Iterable.scala:56)
scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
scala.collection.TraversableLike.map(TraversableLike.scala:237)
scala.collection.TraversableLike.map$(TraversableLike.scala:230)
scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
scala.collection.immutable.List.foreach(List.scala:392)
scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
scala.collection.immutable.List.flatMap(List.scala:355)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:22)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:30)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:78)
com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:467)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:351)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:283)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueMethod(POJOPropertiesCollector.java:169)
com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueMethod(BasicBeanDescription.java:223)
com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:348)
com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:210)
com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:153)
com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1203)
com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1157)
com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:481)
com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:679)
com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:107)
com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3559)
com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2927)
org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:142)
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3038)
org.apache.spark.sql.Dataset.rdd(Dataset.scala:3036)
org.apache.spark.ml.classification.LogisticRegression.$anonfun$train$1(LogisticRegression.scala:497)
org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:183)
scala.util.Try$.apply(Try.scala:213)
org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:494)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:489)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:279)
org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
hnbian.spark.ml.pipeline.EstimatorTransformerParamExam$.main(EstimatorTransformerParamExam.scala:42)
hnbian.spark.ml.pipeline.EstimatorTransformerParamExam.main(EstimatorTransformerParamExam.scala)
Exception in thread “main” java.lang.ArrayIndexOutOfBoundsException: 10582
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at scala.collection.TraversableLike.map(TraversableLike.scala:237)
at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:355)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:22)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:30)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:78)
at com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:467)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:351)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:283)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueMethod(POJOPropertiesCollector.java:169)
at com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueMethod(BasicBeanDescription.java:223)
at com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:348)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:210)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:153)
at com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1203)
at com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1157)
at com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:481)
at com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:679)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:107)
at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3559)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2927)
at org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:142)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3038)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3036)
at org.apache.spark.ml.classification.LogisticRegression.$anonfun$train$1(LogisticRegression.scala:497)
at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:183)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:494)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:489)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:279)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
at hnbian.spark.ml.pipeline.EstimatorTransformerParamExam$.main(EstimatorTransformerParamExam.scala:42)
at hnbian.spark.ml.pipeline.EstimatorTransformerParamExam.main(EstimatorTransformerParamExam.scala)

Process finished with exit code 1

2. 引用依赖

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-core_2.12</artifactId>
    <version>2.4.0</version>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.12</artifactId>
    <version>2.4.0</version>
</dependency>

<!--添加spark机器学习依赖-->
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-mllib_2.12</artifactId>
    <version>2.4.0</version>
</dependency>

3. 报错代码

object EstimatorTransformerParamExam {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("EstimatorTransformerParamExam")
    //设置master local[4] 指定本地模式开启模拟worker线程数
    conf.setMaster("local[4]")
    //创建sparkContext文件
    val sc = new SparkContext(conf)
    val spark = SparkSession.builder().getOrCreate()
    sc.setLogLevel("Error")
    // 从(标签,特征)元组列表中准备训练数据。
    val training = spark.createDataFrame(Seq(
      (1.0, Vectors.dense(0.0, 1.1, 0.1)),
      (0.0, Vectors.dense(2.0, 1.0, -1.0)),
      (0.0, Vectors.dense(2.0, 1.3, 1.0)),
      (1.0, Vectors.dense(0.0, 1.2, -0.5))
    )).toDF("label", "features")

    training.show()

    // 创建一个LogisticRegression实例。 这个实例是一个Estimator。
    val lr = new LogisticRegression()
    // 打印出参数,文档和任何默认值
    println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")

    // 我们可以使用setter方法来设置参数。
    lr.setMaxIter(5) //设置最大迭代次数。默认100
      .setRegParam(0.01) //设置正则化参数。默认0

    // 学习LogisticRegression模型。 这使用存储在lr中的参数。
    val model1 = lr.fit(training)
  }
}

4. 解决办法

在pom.xml 中增加如下依赖


<dependency>
    <groupId>com.thoughtworks.paranamer</groupId>
    <artifactId>paranamer</artifactId>
    <version>2.8</version>
</dependency>

文章作者: hnbian
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 hnbian !
评论
 上一篇
hbase创建表提示表已经存在而list显示无该表的处理 hbase创建表提示表已经存在而list显示无该表的处理
1. 背景之前建了一张HBASE表,删掉之后重新建该表提示表已经存在,但是查询该表的数据又说该表不存在,如下图 2. 解决方法1.进入Hbasezk 命令行 hbase zkcli 2.删除zk 中存储的对应表的相关数据 delete /h
2019-11-20
下一篇 
Could not locate executable null\bin\winutils.exe in the Hadoop binaries Could not locate executable null\bin\winutils.exe in the Hadoop binaries
1. 背景在windows环境下运行hadoop或者Spark相关程序经常能遇见下面这样的问题: java.io.IOException: Could not locate executable null\bin\winutils.exe
2019-11-03
  目录