Skip to content

Commit

Permalink
remove Scala/Java API for now
Browse files Browse the repository at this point in the history
  • Loading branch information
Davies Liu committed Feb 11, 2015
1 parent c80a7a9 commit 08469c1
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 87 deletions.
60 changes: 0 additions & 60 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -266,66 +266,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
createDataFrame(rowRDD.rdd, schema)
}

/**
* Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s by applying
* a seq of names of columns to this RDD, the data type for each column will
* be inferred by the first row.
*
* It does not support nested StructType, use createDataFrame(rdd, schema) instead.
*
* For example:
*
* {{{
* val sqlContext = new org.apache.spark.sql.SQLContext(sc)
*
* val people = sc.textFile("examples/src/main/resources/people.txt").map(
* _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
* val dataFrame = sqlContext.createDataFrame(people, Seq("name", "age"))
* dataFrame.printSchema
* // root
* // |-- name: string (nullable = false)
* // |-- age: integer (nullable = true)
* }}}
*
* @param rowRDD an RDD of Row
* @param columns names for each column
* @return DataFrame
*/
def createDataFrame(rowRDD: RDD[Row], columns: Seq[String]): DataFrame = {
def inferType: PartialFunction[Any, DataType] = ScalaReflection.typeOfObject orElse {
case map: Map[_, _] =>
if (map.isEmpty) {
throw new Exception("Cannot infer type from empty Map")
}
val (k, v) = map.head
MapType(inferType(k), inferType(v), true)
case map: java.util.Map[_, _] =>
if (map.isEmpty) {
throw new Exception("Cannot infer type from empty Map")
}
val (k, v) = map.head
MapType(inferType(k), inferType(v), true)
case seq: Seq[Any] =>
if (seq.isEmpty) {
throw new Exception("Cannot infer type from empty seq")
}
ArrayType(inferType(seq.head), true)
case arr: Array[Any] =>
if (arr.isEmpty) {
throw new Exception("Cannot infer type from empty array")
}
ArrayType(inferType(arr.head), true)
case other =>
throw new Exception(s"Cannot infer type from $other")
}

val first = rowRDD.first()
val types = first.toSeq.map(inferType)
val fields = columns.zip(types).map(x => new StructField(x._1, x._2, true))
val schema = StructType(fields)
createDataFrame(rowRDD, schema)
}

/**
* Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s by applying
* a seq of names of columns to this RDD, the data type for each column will
Expand Down
27 changes: 0 additions & 27 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -736,33 +736,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
Row(4, 2147483644) :: Nil)
}

test("create data frame with names") {
val rowRDD1 = unparsedStrings.map { r =>
val values = r.split(",").map(_.trim)
val v4 = try values(3).toInt catch {
case _: NumberFormatException => null
}
Row(values(0).toInt, values(1), values(2).toBoolean, v4)
}
val columns = Seq("f1", "f2", "f3", "f4")

val df1 = sqlCtx.createDataFrame(rowRDD1, columns)
df1.registerTempTable("applySchema1")
checkAnswer(
sql("SELECT * FROM applySchema1"),
Row(1, "A1", true, null) ::
Row(2, "B2", false, null) ::
Row(3, "C3", true, null) ::
Row(4, "D4", true, 2147483644) :: Nil)

checkAnswer(
sql("SELECT f1, f4 FROM applySchema1"),
Row(1, null) ::
Row(2, null) ::
Row(3, null) ::
Row(4, 2147483644) :: Nil)
}

test("SPARK-3423 BETWEEN") {
checkAnswer(
sql("SELECT key, value FROM testData WHERE key BETWEEN 5 and 7"),
Expand Down

0 comments on commit 08469c1

Please sign in to comment.