-
Notifications
You must be signed in to change notification settings - Fork 28.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-14176][SQL] Add DataFrameWriter.trigger to set the stream batc…
…h period ## What changes were proposed in this pull request? Add a processing time trigger to control the batch processing speed ## How was this patch tested? Unit tests Author: Shixiong Zhu <[email protected]> Closes #11976 from zsxwing/trigger.
- Loading branch information
Showing
9 changed files
with
413 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
133 changes: 133 additions & 0 deletions
133
sql/core/src/main/scala/org/apache/spark/sql/Trigger.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import java.util.concurrent.TimeUnit | ||
|
||
import scala.concurrent.duration.Duration | ||
|
||
import org.apache.commons.lang3.StringUtils | ||
|
||
import org.apache.spark.annotation.Experimental | ||
import org.apache.spark.unsafe.types.CalendarInterval | ||
|
||
/** | ||
* :: Experimental :: | ||
* Used to indicate how often results should be produced by a [[ContinuousQuery]]. | ||
*/ | ||
@Experimental | ||
sealed trait Trigger {} | ||
|
||
/** | ||
* :: Experimental :: | ||
* A trigger that runs a query periodically based on the processing time. If `intervalMs` is 0, | ||
* the query will run as fast as possible. | ||
* | ||
* Scala Example: | ||
* {{{ | ||
* def.writer.trigger(ProcessingTime("10 seconds")) | ||
* | ||
* import scala.concurrent.duration._ | ||
* def.writer.trigger(ProcessingTime(10.seconds)) | ||
* }}} | ||
* | ||
* Java Example: | ||
* {{{ | ||
* def.writer.trigger(ProcessingTime.create("10 seconds")) | ||
* | ||
* import java.util.concurrent.TimeUnit | ||
* def.writer.trigger(ProcessingTime.create(10, TimeUnit.SECONDS)) | ||
* }}} | ||
*/ | ||
@Experimental | ||
case class ProcessingTime(intervalMs: Long) extends Trigger { | ||
require(intervalMs >= 0, "the interval of trigger should not be negative") | ||
} | ||
|
||
/** | ||
* :: Experimental :: | ||
* Used to create [[ProcessingTime]] triggers for [[ContinuousQuery]]s. | ||
*/ | ||
@Experimental | ||
object ProcessingTime { | ||
|
||
/** | ||
* Create a [[ProcessingTime]]. If `intervalMs` is 0, the query will run as fast as possible. | ||
* | ||
* Example: | ||
* {{{ | ||
* def.writer.trigger(ProcessingTime("10 seconds")) | ||
* }}} | ||
*/ | ||
def apply(interval: String): ProcessingTime = { | ||
if (StringUtils.isBlank(interval)) { | ||
throw new IllegalArgumentException( | ||
"interval cannot be null or blank.") | ||
} | ||
val cal = if (interval.startsWith("interval")) { | ||
CalendarInterval.fromString(interval) | ||
} else { | ||
CalendarInterval.fromString("interval " + interval) | ||
} | ||
if (cal == null) { | ||
throw new IllegalArgumentException(s"Invalid interval: $interval") | ||
} | ||
if (cal.months > 0) { | ||
throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval") | ||
} | ||
new ProcessingTime(cal.microseconds / 1000) | ||
} | ||
|
||
/** | ||
* Create a [[ProcessingTime]]. If `intervalMs` is 0, the query will run as fast as possible. | ||
* | ||
* Example: | ||
* {{{ | ||
* import scala.concurrent.duration._ | ||
* def.writer.trigger(ProcessingTime(10.seconds)) | ||
* }}} | ||
*/ | ||
def apply(interval: Duration): ProcessingTime = { | ||
new ProcessingTime(interval.toMillis) | ||
} | ||
|
||
/** | ||
* Create a [[ProcessingTime]]. If `intervalMs` is 0, the query will run as fast as possible. | ||
* | ||
* Example: | ||
* {{{ | ||
* def.writer.trigger(ProcessingTime.create("10 seconds")) | ||
* }}} | ||
*/ | ||
def create(interval: String): ProcessingTime = { | ||
apply(interval) | ||
} | ||
|
||
/** | ||
* Create a [[ProcessingTime]]. If `intervalMs` is 0, the query will run as fast as possible. | ||
* | ||
* Example: | ||
* {{{ | ||
* import java.util.concurrent.TimeUnit | ||
* def.writer.trigger(ProcessingTime.create(10, TimeUnit.SECONDS)) | ||
* }}} | ||
*/ | ||
def create(interval: Long, unit: TimeUnit): ProcessingTime = { | ||
new ProcessingTime(unit.toMillis(interval)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.streaming | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.ProcessingTime | ||
import org.apache.spark.util.{Clock, SystemClock} | ||
|
||
trait TriggerExecutor { | ||
|
||
/** | ||
* Execute batches using `batchRunner`. If `batchRunner` runs `false`, terminate the execution. | ||
*/ | ||
def execute(batchRunner: () => Boolean): Unit | ||
} | ||
|
||
/** | ||
* A trigger executor that runs a batch every `intervalMs` milliseconds. | ||
*/ | ||
case class ProcessingTimeExecutor(processingTime: ProcessingTime, clock: Clock = new SystemClock()) | ||
extends TriggerExecutor with Logging { | ||
|
||
private val intervalMs = processingTime.intervalMs | ||
|
||
override def execute(batchRunner: () => Boolean): Unit = { | ||
while (true) { | ||
val batchStartTimeMs = clock.getTimeMillis() | ||
val terminated = !batchRunner() | ||
if (intervalMs > 0) { | ||
val batchEndTimeMs = clock.getTimeMillis() | ||
val batchElapsedTimeMs = batchEndTimeMs - batchStartTimeMs | ||
if (batchElapsedTimeMs > intervalMs) { | ||
notifyBatchFallingBehind(batchElapsedTimeMs) | ||
} | ||
if (terminated) { | ||
return | ||
} | ||
clock.waitTillTime(nextBatchTime(batchEndTimeMs)) | ||
} else { | ||
if (terminated) { | ||
return | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** Called when a batch falls behind. Expose for test only */ | ||
def notifyBatchFallingBehind(realElapsedTimeMs: Long): Unit = { | ||
logWarning("Current batch is falling behind. The trigger interval is " + | ||
s"${intervalMs} milliseconds, but spent ${realElapsedTimeMs} milliseconds") | ||
} | ||
|
||
/** Return the next multiple of intervalMs */ | ||
def nextBatchTime(now: Long): Long = { | ||
(now - 1) / intervalMs * intervalMs + intervalMs | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
sql/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import java.util.concurrent.TimeUnit | ||
|
||
import scala.concurrent.duration._ | ||
|
||
import org.apache.spark.SparkFunSuite | ||
|
||
class ProcessingTimeSuite extends SparkFunSuite { | ||
|
||
test("create") { | ||
assert(ProcessingTime(10.seconds).intervalMs === 10 * 1000) | ||
assert(ProcessingTime.create(10, TimeUnit.SECONDS).intervalMs === 10 * 1000) | ||
assert(ProcessingTime("1 minute").intervalMs === 60 * 1000) | ||
assert(ProcessingTime("interval 1 minute").intervalMs === 60 * 1000) | ||
|
||
intercept[IllegalArgumentException] { ProcessingTime(null: String) } | ||
intercept[IllegalArgumentException] { ProcessingTime("") } | ||
intercept[IllegalArgumentException] { ProcessingTime("invalid") } | ||
intercept[IllegalArgumentException] { ProcessingTime("1 month") } | ||
intercept[IllegalArgumentException] { ProcessingTime("1 year") } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.