apache · WweiL · Apr 6, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 10, 2023
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/commands.proto b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -259,15 +259,21 @@ message StreamingQueryCommand {
     bool process_all_available = 6;
     // explain() API. Returns logical and physical plans.
     ExplainCommand explain = 7;
-
-    // TODO(SPARK-42960) Add more commands: await_termination(), exception() etc.
+    // exception() API. Returns the exception in the query if any.
+    bool exception = 8;
+    // awaitTermination() API. Waits for the termination of the query.
+    AwaitTerminationCommand await_termination = 9;
   }
 
   message ExplainCommand {
     // TODO: Consider reusing Explain from AnalyzePlanRequest message.
     //       We can not do this right now since it base.proto imports this file.
     bool extended = 1;
   }
+
+  message AwaitTerminationCommand {
+    optional int64 timeout_ms = 2;
+  }
 }
 
 // Response for commands on a streaming query.
@@ -279,6 +285,8 @@ message StreamingQueryCommandResult {
     StatusResult status = 2;
     RecentProgressResult recent_progress = 3;
     ExplainResult explain = 4;
+    ExceptionResult exception = 5;
+    AwaitTerminationResult await_termination = 6;
   }
 
   message StatusResult {
@@ -298,6 +306,15 @@ message StreamingQueryCommandResult {
     // Logical and physical plans as string
     string result = 1;
   }
+
+  message ExceptionResult {
 raise convert_exception(info, status.message) from None 
 elif "org.apache.spark.sql.streaming.StreamingQueryException" in classes: 
     return StreamingQueryException(message) 
 raise convert_exception(info, status.message) from None 
 elif "org.apache.spark.sql.streaming.StreamingQueryException" in classes: 
     return StreamingQueryException(message) 
+    // Exception message as string
+    optional string exception_message = 1;
+  }
+
+  message AwaitTerminationResult {
+    optional bool terminated = 1;
+  }
 }
 
 // Command to get the output of 'SparkContext.resources'

diff --git a/...nect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/...nect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -52,7 +52,7 @@ import org.apache.spark.sql.connect.artifact.SparkConnectArtifactManager
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, UdfPacket}
 import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
 import org.apache.spark.sql.connect.plugin.SparkConnectPluginRegistry
-import org.apache.spark.sql.connect.service.SparkConnectStreamHandler
+import org.apache.spark.sql.connect.service.{SparkConnectService, SparkConnectStreamHandler}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.arrow.ArrowConverters
@@ -2255,6 +2255,23 @@ class SparkConnectPlanner(val session: SparkSession) {
           .build()
         respBuilder.setExplain(explain)
 
+      case StreamingQueryCommand.CommandCase.EXCEPTION =>
+        val result = query.exception
+        result.foreach(e =>
+          respBuilder.getExceptionBuilder
+            .setExceptionMessage(SparkConnectService.extractErrorMessage(e)))
+
+      case StreamingQueryCommand.CommandCase.AWAIT_TERMINATION =>
+        if (command.getAwaitTermination.hasTimeoutMs) {
+          val terminated = query.awaitTermination(command.getAwaitTermination.getTimeoutMs)
+          respBuilder.getAwaitTerminationBuilder
+            .setTerminated(terminated)
+        } else {
+          query.awaitTermination()
+          respBuilder.getAwaitTerminationBuilder
+            .setTerminated(true)
+        }
+
       case StreamingQueryCommand.CommandCase.COMMAND_NOT_SET =>
         throw new IllegalArgumentException("Missing command in StreamingQueryCommand")
     }

diff --git a/...nect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/...nect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
@@ -76,7 +76,6 @@ class SparkConnectService(debug: Boolean)
   }
 
   private def buildStatusFromThrowable(st: Throwable, stackTraceEnabled: Boolean): RPCStatus = {
-    val message = StringUtils.abbreviate(st.getMessage, 2048)
     val errorInfo = ErrorInfo
       .newBuilder()
       .setReason(st.getClass.getName)
@@ -95,7 +94,7 @@ class SparkConnectService(debug: Boolean)
       .newBuilder()
       .setCode(RPCCode.INTERNAL_VALUE)
       .addDetails(ProtoAny.pack(withStackTrace.build()))
-      .setMessage(if (message != null) message else "")
+      .setMessage(SparkConnectService.extractErrorMessage(st))
       .build()
   }
 
@@ -341,4 +340,13 @@ object SparkConnectService {
       }
     }
   }
+
+  def extractErrorMessage(st: Throwable): String = {
+    val message = StringUtils.abbreviate(st.getMessage, 2048)
+    if (message != null) {
+      message
+    } else {
+      ""
+    }
+  }
 }
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -876,13 +876,41 @@ class StreamingQueryCommand(google.protobuf.message.Message):
             self, field_name: typing_extensions.Literal["extended", b"extended"]
         ) -> None: ...
 
+    class AwaitTerminationCommand(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TIMEOUT_MS_FIELD_NUMBER: builtins.int
+        timeout_ms: builtins.int
+        def __init__(
+            self,
+            *,
+            timeout_ms: builtins.int | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_timeout_ms", b"_timeout_ms", "timeout_ms", b"timeout_ms"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_timeout_ms", b"_timeout_ms", "timeout_ms", b"timeout_ms"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_timeout_ms", b"_timeout_ms"]
+        ) -> typing_extensions.Literal["timeout_ms"] | None: ...
+
     QUERY_ID_FIELD_NUMBER: builtins.int
     STATUS_FIELD_NUMBER: builtins.int
     LAST_PROGRESS_FIELD_NUMBER: builtins.int
     RECENT_PROGRESS_FIELD_NUMBER: builtins.int
     STOP_FIELD_NUMBER: builtins.int
     PROCESS_ALL_AVAILABLE_FIELD_NUMBER: builtins.int
     EXPLAIN_FIELD_NUMBER: builtins.int
+    EXCEPTION_FIELD_NUMBER: builtins.int
+    AWAIT_TERMINATION_FIELD_NUMBER: builtins.int
     @property
     def query_id(self) -> global___StreamingQueryInstanceId:
         """(Required) Query instance. See `StreamingQueryInstanceId`."""
@@ -899,6 +927,11 @@ class StreamingQueryCommand(google.protobuf.message.Message):
     @property
     def explain(self) -> global___StreamingQueryCommand.ExplainCommand:
         """explain() API. Returns logical and physical plans."""
+    exception: builtins.bool
+    """exception() API. Returns the exception in the query if any."""
+    @property
+    def await_termination(self) -> global___StreamingQueryCommand.AwaitTerminationCommand:
+        """awaitTermination() API. Waits for the termination of the query."""
     def __init__(
         self,
         *,
@@ -909,12 +942,18 @@ class StreamingQueryCommand(google.protobuf.message.Message):
         stop: builtins.bool = ...,
         process_all_available: builtins.bool = ...,
         explain: global___StreamingQueryCommand.ExplainCommand | None = ...,
+        exception: builtins.bool = ...,
+        await_termination: global___StreamingQueryCommand.AwaitTerminationCommand | None = ...,
     ) -> None: ...
     def HasField(
         self,
         field_name: typing_extensions.Literal[
+            "await_termination",
+            b"await_termination",
             "command",
             b"command",
+            "exception",
+            b"exception",
             "explain",
             b"explain",
             "last_progress",
@@ -934,8 +973,12 @@ class StreamingQueryCommand(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "await_termination",
+            b"await_termination",
             "command",
             b"command",
+            "exception",
+            b"exception",
             "explain",
             b"explain",
             "last_progress",
@@ -955,7 +998,14 @@ class StreamingQueryCommand(google.protobuf.message.Message):
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["command", b"command"]
     ) -> typing_extensions.Literal[
-        "status", "last_progress", "recent_progress", "stop", "process_all_available", "explain"
+        "status",
+        "last_progress",
+        "recent_progress",
+        "stop",
+        "process_all_available",
+        "explain",
+        "exception",
+        "await_termination",
     ] | None: ...
 
 global___StreamingQueryCommand = StreamingQueryCommand
@@ -1033,10 +1083,72 @@ class StreamingQueryCommandResult(google.protobuf.message.Message):
             self, field_name: typing_extensions.Literal["result", b"result"]
         ) -> None: ...
 
+    class ExceptionResult(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        EXCEPTION_MESSAGE_FIELD_NUMBER: builtins.int
+        exception_message: builtins.str
+        """Exception message as string"""
+        def __init__(
+            self,
+            *,
+            exception_message: builtins.str | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_exception_message",
+                b"_exception_message",
+                "exception_message",
+                b"exception_message",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_exception_message",
+                b"_exception_message",
+                "exception_message",
+                b"exception_message",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self,
+            oneof_group: typing_extensions.Literal["_exception_message", b"_exception_message"],
+        ) -> typing_extensions.Literal["exception_message"] | None: ...
+
+    class AwaitTerminationResult(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TERMINATED_FIELD_NUMBER: builtins.int
+        terminated: builtins.bool
+        def __init__(
+            self,
+            *,
+            terminated: builtins.bool | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_terminated", b"_terminated", "terminated", b"terminated"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_terminated", b"_terminated", "terminated", b"terminated"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_terminated", b"_terminated"]
+        ) -> typing_extensions.Literal["terminated"] | None: ...
+
     QUERY_ID_FIELD_NUMBER: builtins.int
     STATUS_FIELD_NUMBER: builtins.int
     RECENT_PROGRESS_FIELD_NUMBER: builtins.int
     EXPLAIN_FIELD_NUMBER: builtins.int
+    EXCEPTION_FIELD_NUMBER: builtins.int
+    AWAIT_TERMINATION_FIELD_NUMBER: builtins.int
     @property
     def query_id(self) -> global___StreamingQueryInstanceId:
         """(Required) Query instance id. See `StreamingQueryInstanceId`."""
@@ -1046,17 +1158,27 @@ class StreamingQueryCommandResult(google.protobuf.message.Message):
     def recent_progress(self) -> global___StreamingQueryCommandResult.RecentProgressResult: ...
     @property
     def explain(self) -> global___StreamingQueryCommandResult.ExplainResult: ...
+    @property
+    def exception(self) -> global___StreamingQueryCommandResult.ExceptionResult: ...
+    @property
+    def await_termination(self) -> global___StreamingQueryCommandResult.AwaitTerminationResult: ...
     def __init__(
         self,
         *,
         query_id: global___StreamingQueryInstanceId | None = ...,
         status: global___StreamingQueryCommandResult.StatusResult | None = ...,
         recent_progress: global___StreamingQueryCommandResult.RecentProgressResult | None = ...,
         explain: global___StreamingQueryCommandResult.ExplainResult | None = ...,
+        exception: global___StreamingQueryCommandResult.ExceptionResult | None = ...,
+        await_termination: global___StreamingQueryCommandResult.AwaitTerminationResult | None = ...,
     ) -> None: ...
     def HasField(
         self,
         field_name: typing_extensions.Literal[
+            "await_termination",
+            b"await_termination",
+            "exception",
+            b"exception",
             "explain",
             b"explain",
             "query_id",
@@ -1072,6 +1194,10 @@ class StreamingQueryCommandResult(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "await_termination",
+            b"await_termination",
+            "exception",
+            b"exception",
             "explain",
             b"explain",
             "query_id",
@@ -1086,7 +1212,9 @@ class StreamingQueryCommandResult(google.protobuf.message.Message):
     ) -> None: ...
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["result_type", b"result_type"]
-    ) -> typing_extensions.Literal["status", "recent_progress", "explain"] | None: ...
+    ) -> typing_extensions.Literal[
+        "status", "recent_progress", "explain", "exception", "await_termination"
+    ] | None: ...
 
 global___StreamingQueryCommandResult = StreamingQueryCommandResult
 

diff --git a/python/pyspark/sql/connect/streaming/query.py b/python/pyspark/sql/connect/streaming/query.py
@@ -24,6 +24,9 @@
 from pyspark.sql.streaming.query import (
     StreamingQuery as PySparkStreamingQuery,
 )
+from pyspark.errors.exceptions.connect import (
+    StreamingQueryException as CapturedStreamingQueryException,
+)
 
 __all__ = [
     "StreamingQuery",  # TODO(SPARK-43032): "StreamingQueryManager"
@@ -66,11 +69,21 @@ def isActive(self) -> bool:
 
     isActive.__doc__ = PySparkStreamingQuery.isActive.__doc__
 
-    # TODO (SPARK-42960): Implement and uncomment the doc
     def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
-        raise NotImplementedError()
+        cmd = pb2.StreamingQueryCommand()
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)) or timeout <= 0:
+                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
+            cmd.await_termination.timeout_ms = int(timeout * 1000)
+            terminated = self._execute_streaming_query_cmd(cmd).await_termination.terminated
+            return terminated
+        else:
+            await_termination_cmd = pb2.StreamingQueryCommand.AwaitTerminationCommand()
+            cmd.await_termination.CopyFrom(await_termination_cmd)
+            self._execute_streaming_query_cmd(cmd)
+            return None
 
-    # awaitTermination.__doc__ = PySparkStreamingQuery.awaitTermination.__doc__
+    awaitTermination.__doc__ = PySparkStreamingQuery.awaitTermination.__doc__
 
     @property
     def status(self) -> Dict[str, Any]:
@@ -127,9 +140,14 @@ def explain(self, extended: bool = False) -> None:
 
     explain.__doc__ = PySparkStreamingQuery.explain.__doc__
 
-    # TODO (SPARK-42960): Implement and uncomment the doc
     def exception(self) -> Optional[StreamingQueryException]:
-        raise NotImplementedError()
+        cmd = pb2.StreamingQueryCommand()
+        cmd.exception = True
+        exception = self._execute_streaming_query_cmd(cmd).exception
+        if exception.HasField("exception_message"):
+            return CapturedStreamingQueryException(exception.exception_message)
+        else:
+            return None
 
     exception.__doc__ = PySparkStreamingQuery.exception.__doc__
 

diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
@@ -196,7 +196,7 @@ def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
         >>> sq.stop()
         """
         if timeout is not None:
-            if not isinstance(timeout, (int, float)) or timeout < 0:
+            if not isinstance(timeout, (int, float)) or timeout <= 0:
                 raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
             return self._jsq.awaitTermination(int(timeout * 1000))
         else: