diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index df7d21dde05b1..90b42d03a320e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -266,66 +266,6 @@ class SQLContext(@transient val sparkContext: SparkContext) createDataFrame(rowRDD.rdd, schema) } - /** - * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s by applying - * a seq of names of columns to this RDD, the data type for each column will - * be inferred by the first row. - * - * It does not support nested StructType, use createDataFrame(rdd, schema) instead. - * - * For example: - * - * {{{ - * val sqlContext = new org.apache.spark.sql.SQLContext(sc) - * - * val people = sc.textFile("examples/src/main/resources/people.txt").map( - * _.split(",")).map(p => Row(p(0), p(1).trim.toInt)) - * val dataFrame = sqlContext.createDataFrame(people, Seq("name", "age")) - * dataFrame.printSchema - * // root - * // |-- name: string (nullable = false) - * // |-- age: integer (nullable = true) - * }}} - * - * @param rowRDD an RDD of Row - * @param columns names for each column - * @return DataFrame - */ - def createDataFrame(rowRDD: RDD[Row], columns: Seq[String]): DataFrame = { - def inferType: PartialFunction[Any, DataType] = ScalaReflection.typeOfObject orElse { - case map: Map[_, _] => - if (map.isEmpty) { - throw new Exception("Cannot infer type from empty Map") - } - val (k, v) = map.head - MapType(inferType(k), inferType(v), true) - case map: java.util.Map[_, _] => - if (map.isEmpty) { - throw new Exception("Cannot infer type from empty Map") - } - val (k, v) = map.head - MapType(inferType(k), inferType(v), true) - case seq: Seq[Any] => - if (seq.isEmpty) { - throw new Exception("Cannot infer type from empty seq") - } - ArrayType(inferType(seq.head), true) - case arr: Array[Any] => - if (arr.isEmpty) { - throw new Exception("Cannot infer type from empty array") - } - ArrayType(inferType(arr.head), true) - case other => - throw new Exception(s"Cannot infer type from $other") - } - - val first = rowRDD.first() - val types = first.toSeq.map(inferType) - val fields = columns.zip(types).map(x => new StructField(x._1, x._2, true)) - val schema = StructType(fields) - createDataFrame(rowRDD, schema) - } - /** * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s by applying * a seq of names of columns to this RDD, the data type for each column will diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0afb6342b8335..575e4e2f4ef50 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -736,33 +736,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { Row(4, 2147483644) :: Nil) } - test("create data frame with names") { - val rowRDD1 = unparsedStrings.map { r => - val values = r.split(",").map(_.trim) - val v4 = try values(3).toInt catch { - case _: NumberFormatException => null - } - Row(values(0).toInt, values(1), values(2).toBoolean, v4) - } - val columns = Seq("f1", "f2", "f3", "f4") - - val df1 = sqlCtx.createDataFrame(rowRDD1, columns) - df1.registerTempTable("applySchema1") - checkAnswer( - sql("SELECT * FROM applySchema1"), - Row(1, "A1", true, null) :: - Row(2, "B2", false, null) :: - Row(3, "C3", true, null) :: - Row(4, "D4", true, 2147483644) :: Nil) - - checkAnswer( - sql("SELECT f1, f4 FROM applySchema1"), - Row(1, null) :: - Row(2, null) :: - Row(3, null) :: - Row(4, 2147483644) :: Nil) - } - test("SPARK-3423 BETWEEN") { checkAnswer( sql("SELECT key, value FROM testData WHERE key BETWEEN 5 and 7"),