1. 背景
测试spark ml 代码的时候报了一个数组下标越界的异常,报错日志如下:
19/11/15 11:02:21 ERROR Instrumentation: com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
scala.collection.Iterator.foreach(Iterator.scala:941)
scala.collection.Iterator.foreach$(Iterator.scala:941)
scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
scala.collection.IterableLike.foreach(IterableLike.scala:74)
scala.collection.IterableLike.foreach$(IterableLike.scala:73)
scala.collection.AbstractIterable.foreach(Iterable.scala:56)
scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
scala.collection.TraversableLike.map(TraversableLike.scala:237)
scala.collection.TraversableLike.map$(TraversableLike.scala:230)
scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
scala.collection.immutable.List.foreach(List.scala:392)
scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
scala.collection.immutable.List.flatMap(List.scala:355)
com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:22)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:30)
com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:78)
com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:467)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:351)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:283)
com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueMethod(POJOPropertiesCollector.java:169)
com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueMethod(BasicBeanDescription.java:223)
com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:348)
com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:210)
com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:153)
com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1203)
com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1157)
com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:481)
com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:679)
com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:107)
com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3559)
com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2927)
org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:142)
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3038)
org.apache.spark.sql.Dataset.rdd(Dataset.scala:3036)
org.apache.spark.ml.classification.LogisticRegression.$anonfun$train$1(LogisticRegression.scala:497)
org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:183)
scala.util.Try$.apply(Try.scala:213)
org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:494)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:489)
org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:279)
org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
hnbian.spark.ml.pipeline.EstimatorTransformerParamExam$.main(EstimatorTransformerParamExam.scala:42)
hnbian.spark.ml.pipeline.EstimatorTransformerParamExam.main(EstimatorTransformerParamExam.scala)
Exception in thread “main” java.lang.ArrayIndexOutOfBoundsException: 10582
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at scala.collection.TraversableLike.map(TraversableLike.scala:237)
at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:355)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:22)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:30)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:78)
at com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:467)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:351)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:283)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueMethod(POJOPropertiesCollector.java:169)
at com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueMethod(BasicBeanDescription.java:223)
at com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:348)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:210)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:153)
at com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1203)
at com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1157)
at com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:481)
at com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:679)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:107)
at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3559)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2927)
at org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:142)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3038)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3036)
at org.apache.spark.ml.classification.LogisticRegression.$anonfun$train$1(LogisticRegression.scala:497)
at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:183)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:494)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:489)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:279)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
at hnbian.spark.ml.pipeline.EstimatorTransformerParamExam$.main(EstimatorTransformerParamExam.scala:42)
at hnbian.spark.ml.pipeline.EstimatorTransformerParamExam.main(EstimatorTransformerParamExam.scala)
Process finished with exit code 1
2. 引用依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>2.4.0</version>
</dependency>
<!--添加spark机器学习依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.12</artifactId>
<version>2.4.0</version>
</dependency>
3. 报错代码
object EstimatorTransformerParamExam {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("EstimatorTransformerParamExam")
//设置master local[4] 指定本地模式开启模拟worker线程数
conf.setMaster("local[4]")
//创建sparkContext文件
val sc = new SparkContext(conf)
val spark = SparkSession.builder().getOrCreate()
sc.setLogLevel("Error")
// 从(标签,特征)元组列表中准备训练数据。
val training = spark.createDataFrame(Seq(
(1.0, Vectors.dense(0.0, 1.1, 0.1)),
(0.0, Vectors.dense(2.0, 1.0, -1.0)),
(0.0, Vectors.dense(2.0, 1.3, 1.0)),
(1.0, Vectors.dense(0.0, 1.2, -0.5))
)).toDF("label", "features")
training.show()
// 创建一个LogisticRegression实例。 这个实例是一个Estimator。
val lr = new LogisticRegression()
// 打印出参数,文档和任何默认值
println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
// 我们可以使用setter方法来设置参数。
lr.setMaxIter(5) //设置最大迭代次数。默认100
.setRegParam(0.01) //设置正则化参数。默认0
// 学习LogisticRegression模型。 这使用存储在lr中的参数。
val model1 = lr.fit(training)
}
}
4. 解决办法
在pom.xml 中增加如下依赖
<dependency>
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
<version>2.8</version>
</dependency>