unify the two traits

eason-yuchen-liu · Jun 27, 2024 · 7c6cdad · 7c6cdad
1 parent cd6a39b
commit 7c6cdad
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 13 deletions.
diff --git a/...main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala b/...main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
@@ -158,7 +158,11 @@ class StateStoreChangeDataPartitionReader(
   schema: StructType) extends StatePartitionReader(storeConf, hadoopConf, partition, schema) {
 
   private lazy val cdcReader: StateStoreChangeDataReader = {
-    provider.asInstanceOf[SupportsStateStoreChangeDataFeed]
+    if (!provider.isInstanceOf[SupportsFineGrainedReplay]) {
+      throw StateStoreErrors.stateStoreProviderDoesNotSupportFineGrainedReplay(
+        provider.getClass.toString)
+    }
+    provider.asInstanceOf[SupportsFineGrainedReplay]
       .getStateStoreChangeDataReader(
         partition.sourceOptions.cdcStartBatchID.get + 1,
         partition.sourceOptions.cdcEndBatchId.get + 1)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -440,9 +440,9 @@ object StateStoreProvider {
 }
 
 /**
- * This is an optional trait to be implemented by [[StateStoreProvider]]s that can read fine
- * grained state data which is replayed from a specific snapshot version. It is used by the
- * snapshotStartBatchId option in state data source.
+ * This is an optional trait to be implemented by [[StateStoreProvider]]s that can read the change
+ * of state store over batches. This is used by State Data Source with additional options like
+ * snapshotStartBatchId or readChangeFeed.
  */
 trait SupportsFineGrainedReplay {
   /**
@@ -469,6 +469,15 @@ trait SupportsFineGrainedReplay {
   def replayReadStateFromSnapshot(snapshotVersion: Long, endVersion: Long): ReadStateStore = {
     new WrappedReadStateStore(replayStateFromSnapshot(snapshotVersion, endVersion))
   }
+
+  /**
+   *
+   * @param startVersion
+   * @param endVersion
+   * @return
+   */
+  def getStateStoreChangeDataReader(startVersion: Long, endVersion: Long):
+    StateStoreChangeDataReader
 }
 
 /**

diff --git a/...ain/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangeDataReader.scala b/...ain/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangeDataReader.scala
@@ -26,15 +26,6 @@ import org.apache.spark.sql.execution.streaming.CheckpointFileManager
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.NextIterator
 
-/**
- * This is an optional trait for [[StateStoreProvider]]s to mix in if they support reading state
- * change data. It is used by the readChangeFeed option of State Data Source.
- */
-trait SupportsStateStoreChangeDataFeed {
-  def getStateStoreChangeDataReader(startVersion: Long, endVersion: Long):
-    StateStoreChangeDataReader
-}
-
 /**
  * Base class for state store changelog reader
  * @param fm - checkpoint file manager used to manage streaming query checkpoint

diff --git a/...2/state/StateDataSourceCDCReadSuite.scala → .../StateDataSourceChangeDataReadSuite.scala b/...2/state/StateDataSourceCDCReadSuite.scala → .../StateDataSourceChangeDataReadSuite.scala