diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index a2443ed3d60b6..4f133078200a8 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -257,8 +257,8 @@ def isLocal(self): def isStreaming(self): """Returns true if this :class:`Dataset` contains one or more sources that continuously return data as it arrives. A :class:`Dataset` that reads data from a streaming source - must be executed as a :class:`StreamingQuery` using the :func:`startStream` method in - :class:`DataFrameWriter`. Methods that return a single answer, (e.g., :func:`count` or + must be executed as a :class:`StreamingQuery` using the :func:`start` method in + :class:`DataStreamWriter`. Methods that return a single answer, (e.g., :func:`count` or :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming source present. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 689e016a5a1d9..f6e32e29ebca8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -30,7 +30,7 @@ object UnsupportedOperationChecker { def checkForBatch(plan: LogicalPlan): Unit = { plan.foreachUp { case p if p.isStreaming => - throwError("Queries with streaming sources must be executed with write.startStream()")(p) + throwError("Queries with streaming sources must be executed with writeStream.start()")(p) case _ => } @@ -40,7 +40,7 @@ object UnsupportedOperationChecker { if (!plan.isStreaming) { throwError( - "Queries without streaming sources cannot be executed with write.startStream()")(plan) + "Queries without streaming sources cannot be executed with writeStream.start()")(plan) } // Disallow multiple streaming aggregations @@ -154,7 +154,7 @@ object UnsupportedOperationChecker { case ReturnAnswer(child) if child.isStreaming => throwError("Cannot return immediate result on streaming DataFrames/Dataset. Queries " + - "with streaming DataFrames/Datasets must be executed with write.startStream().") + "with streaming DataFrames/Datasets must be executed with writeStream.start().") case _ => } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index c21ad5e03a48d..6df47acaba85b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -53,12 +53,12 @@ class UnsupportedOperationsSuite extends SparkFunSuite { assertNotSupportedInBatchPlan( "streaming source", streamRelation, - Seq("with streaming source", "startStream")) + Seq("with streaming source", "start")) assertNotSupportedInBatchPlan( "select on streaming source", streamRelation.select($"count(*)"), - Seq("with streaming source", "startStream")) + Seq("with streaming source", "start")) /* @@ -70,7 +70,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { // Batch plan in streaming query testError( "streaming plan - no streaming source", - Seq("without streaming source", "startStream")) { + Seq("without streaming source", "start")) { UnsupportedOperationChecker.checkForStreaming(batchRelation.select($"count(*)"), Append) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 9997162f7cfcf..df9f1888ee8b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -463,8 +463,8 @@ class Dataset[T] private[sql]( /** * Returns true if this Dataset contains one or more sources that continuously * return data as it arrives. A Dataset that reads data from a streaming source - * must be executed as a [[StreamingQuery]] using the `startStream()` method in - * [[DataFrameWriter]]. Methods that return a single answer, e.g. `count()` or + * must be executed as a [[StreamingQuery]] using the `start()` method in + * [[DataStreamWriter]]. Methods that return a single answer, e.g. `count()` or * `collect()`, will throw an [[AnalysisException]] when there is a streaming * source present. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index d4b0a3cca2402..d38e3e58125d9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -109,7 +109,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { /** * :: Experimental :: - * Specifies the name of the [[StreamingQuery]] that can be started with `startStream()`. + * Specifies the name of the [[StreamingQuery]] that can be started with `start()`. * This name must be unique among all the currently active queries in the associated SQLContext. * * @since 2.0.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala index 19d1ecf740d0a..91f0a1e3446a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala @@ -31,8 +31,8 @@ trait StreamingQuery { /** * Returns the name of the query. This name is unique across all active queries. This can be - * set in the[[org.apache.spark.sql.DataFrameWriter DataFrameWriter]] as - * `dataframe.write().queryName("query").startStream()`. + * set in the [[org.apache.spark.sql.DataStreamWriter DataStreamWriter]] as + * `dataframe.writeStream.queryName("query").start()`. * @since 2.0.0 */ def name: String diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala index c43de58faa80c..3b3cead3a66de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala @@ -35,9 +35,9 @@ abstract class StreamingQueryListener { /** * Called when a query is started. * @note This is called synchronously with - * [[org.apache.spark.sql.DataFrameWriter `DataFrameWriter.startStream()`]], + * [[org.apache.spark.sql.DataStreamWriter `DataStreamWriter.start()`]], * that is, `onQueryStart` will be called on all listeners before - * `DataFrameWriter.startStream()` returns the corresponding [[StreamingQuery]]. Please + * `DataStreamWriter.start()` returns the corresponding [[StreamingQuery]]. Please * don't block this method as it will block your query. * @since 2.0.0 */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index c4a894b6816ac..28170f30646ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -120,12 +120,12 @@ class StreamSuite extends StreamTest { } // Running streaming plan as a batch query - assertError("startStream" :: Nil) { + assertError("start" :: Nil) { streamInput.toDS.map { i => i }.count() } // Running non-streaming plan with as a streaming query - assertError("without streaming sources" :: "startStream" :: Nil) { + assertError("without streaming sources" :: "start" :: Nil) { val ds = batchInput.map { i => i } testStream(ds)() }