i'm trying create dataset geo data using spark , esri. if foo
have point
field, it'll work if add other fields beyond point
, arrayindexoutofboundsexception.
import com.esri.core.geometry.point import org.apache.spark.sql.{encoder, encoders, sqlcontext} import org.apache.spark.{sparkconf, sparkcontext} object main { case class foo(position: point, name: string) object myencoders { implicit def pointencoder: encoder[point] = encoders.kryo[point] implicit def fooencoder: encoder[foo] = encoders.kryo[foo] } def main(args: array[string]): unit = { val sc = new sparkcontext(new sparkconf().setappname("app").setmaster("local")) val sqlcontext = new sqlcontext(sc) import myencoders.{fooencoder, pointencoder} import sqlcontext.implicits._ seq(new foo(new point(0, 0), "bar")).tods.show } }
exception in thread "main" java.lang.arrayindexoutofboundsexception: 1 @ org.apache.spark.sql.execution.queryable$$anonfun$formatstring$1$$anonfun$apply$2.apply(queryable.scala:71) @ org.apache.spark.sql.execution.queryable$$anonfun$formatstring$1$$anonfun$apply$2.apply(queryable.scala:70) @ scala.collection.traversablelike$withfilter$$anonfun$foreach$1.apply(traversablelike.scala:772) @ scala.collection.mutable.resizablearray$class.foreach(resizablearray.scala:59) @ scala.collection.mutable.arraybuffer.foreach(arraybuffer.scala:47) @ scala.collection.traversablelike$withfilter.foreach(traversablelike.scala:771) @ org.apache.spark.sql.execution.queryable$$anonfun$formatstring$1.apply(queryable.scala:70) @ org.apache.spark.sql.execution.queryable$$anonfun$formatstring$1.apply(queryable.scala:69) @ scala.collection.mutable.arrayseq.foreach(arrayseq.scala:73) @ org.apache.spark.sql.execution.queryable$class.formatstring(queryable.scala:69) @ org.apache.spark.sql.dataset.formatstring(dataset.scala:65) @ org.apache.spark.sql.dataset.showstring(dataset.scala:263) @ org.apache.spark.sql.dataset.show(dataset.scala:230) @ org.apache.spark.sql.dataset.show(dataset.scala:193) @ org.apache.spark.sql.dataset.show(dataset.scala:201) @ main$.main(main.scala:24) @ main.main(main.scala)
kryo create encoder complex data types based on spark sql data types. check result of schema kryo create:
val enc: encoder[foo] = encoders.kryo[foo] println(enc.schema) // structtype(structfield(value,binarytype,true)) val numcols = schema.fieldnames.length // 1
so have 1 column data in dataset , it's in binary format. it's strange why spark attempting show dataset in more 1 column (and error occurs). fix this, upgrade spark version 2.0.0.
by using spark 2.0.0, still have problem columns data types. hope writing manual schema works if can write structtype esri point class:
val schema = structtype( seq( structfield("point", structtype(...), true), structfield("name", stringtype, true) ) ) val rdd = sc.parallelize(seq(row(new point(0,0), "bar"))) sqlcontext.createdataframe(rdd, schema).tods
Comments
Post a Comment