Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to set partition expiration on tables #198

Merged
merged 2 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import com.typesafe.tools.mima.core._

// https://typelevel.org/sbt-typelevel/faq.html#what-is-a-base-version-anyway
ThisBuild / tlBaseVersion := "0.10" // your current series x.y
ThisBuild / tlBaseVersion := "0.11" // your current series x.y

ThisBuild / organization := "no.nrk.bigquery"
ThisBuild / organizationName := "NRK"
Expand Down
8 changes: 6 additions & 2 deletions core/src/main/scala/no/nrk/bigquery/TableOptions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@

package no.nrk.bigquery

import scala.concurrent.duration.FiniteDuration

case class TableOptions(
partitionFilterRequired: Boolean
partitionFilterRequired: Boolean,
partitionExpiration: Option[FiniteDuration]
)

object TableOptions {
val Empty: TableOptions = TableOptions(
partitionFilterRequired = false
partitionFilterRequired = false,
partitionExpiration = None
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
package no.nrk.bigquery
package internal

import com.google.cloud.bigquery.{DatasetId, TableId, TableInfo}
import com.google.cloud.bigquery.{DatasetId, StandardTableDefinition, TableDefinition, TableId, TableInfo}
import no.nrk.bigquery.TableLabels.Empty

import java.util.concurrent.TimeUnit
import scala.concurrent.duration.FiniteDuration
import scala.jdk.CollectionConverters._

object GoogleTypeHelper {
Expand All @@ -19,7 +21,14 @@ object GoogleTypeHelper {
TableId.of(tableId.dataset.project.value, tableId.dataset.id, tableId.tableName)

def toTableOptions(tableInfo: TableInfo): TableOptions = TableOptions(
partitionFilterRequired = Option(tableInfo.getRequirePartitionFilter).exists(_.booleanValue())
partitionFilterRequired = Option(tableInfo.getRequirePartitionFilter).exists(_.booleanValue()),
tableInfo.getDefinition[TableDefinition] match {
case definition: StandardTableDefinition =>
Option(definition.getTimePartitioning)
.flatMap(tp => Option(tp.getExpirationMs))
.map(expires => FiniteDuration(expires, TimeUnit.MILLISECONDS))
case _ => None
}
)

def unsafeTableIdFromGoogle(dataset: BQDataset, tableId: TableId): BQTableId = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ import com.google.cloud.bigquery.{
object PartitionTypeHelper {
private def assertIsUsed(a: Any*): Unit = (a, ())._2

def timePartitioned(bqtype: BQPartitionType[Any]): Option[TimePartitioning] =
def timePartitioned(bqtype: BQPartitionType[Any], tableOptions: TableOptions): Option[TimePartitioning] =
bqtype match {
case BQPartitionType.DatePartitioned(field) =>
Some(
TimePartitioning
.newBuilder(TimePartitioning.Type.DAY)
.setExpirationMs(tableOptions.partitionExpiration.map(exp => Long.box(exp.toMillis)).orNull)
.setField(field.value)
.build()
)
Expand All @@ -31,6 +32,7 @@ object PartitionTypeHelper {
Some(
TimePartitioning
.newBuilder(TimePartitioning.Type.MONTH)
.setExpirationMs(tableOptions.partitionExpiration.map(exp => Long.box(exp.toMillis)).orNull)
.setField(field.value)
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ object TableUpdateOperation {
StandardTableDefinition.newBuilder
.setSchema(SchemaHelper.toSchema(schema))
.setTimePartitioning(
PartitionTypeHelper.timePartitioned(partitioning).orNull
PartitionTypeHelper.timePartitioned(partitioning, tableOptions).orNull
)
.setRangePartitioning(
PartitionTypeHelper.rangepartitioned(partitioning).orNull
Expand Down Expand Up @@ -196,7 +196,7 @@ object TableUpdateOperation {
tableId.underlying,
StandardTableDefinition.newBuilder
.setSchema(SchemaHelper.toSchema(schema))
.setTimePartitioning(PartitionTypeHelper.timePartitioned(partitionType).orNull)
.setTimePartitioning(PartitionTypeHelper.timePartitioned(partitionType, tableOptions).orNull)
.setRangePartitioning(PartitionTypeHelper.rangepartitioned(partitionType).orNull)
.setClustering(clusteringFrom(clustering).orNull)
.build,
Expand Down Expand Up @@ -225,7 +225,7 @@ object TableUpdateOperation {
.setEnableRefresh(enableRefresh)
// .setSchema(schema.toSchema) // not possible for now
.setRefreshIntervalMs(refreshIntervalMs)
.setTimePartitioning(PartitionTypeHelper.timePartitioned(partitionType).orNull)
.setTimePartitioning(PartitionTypeHelper.timePartitioned(partitionType, tableOptions).orNull)
.setRangePartitioning(PartitionTypeHelper.rangepartitioned(partitionType).orNull)
.build(),
tableOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import munit.FunSuite
import no.nrk.bigquery.syntax._
import no.nrk.bigquery._
import GoogleTypeHelper._
import scala.concurrent.duration._

class TableUpdateOperationTest extends FunSuite {

Expand Down Expand Up @@ -400,7 +401,7 @@ class TableUpdateOperationTest extends FunSuite {
description = None,
clustering = Nil,
TableLabels.Empty,
tableOptions = TableOptions(partitionFilterRequired = filter)
tableOptions = TableOptions.Empty.copy(partitionFilterRequired = filter)
)
def remote(filter: Boolean) = Some(
TableInfo
Expand Down Expand Up @@ -437,4 +438,58 @@ class TableUpdateOperationTest extends FunSuite {

}

test("updating partitionExpiration should result in update") {
def testTable(expiration: Option[FiniteDuration]) = BQTableDef.Table(
tableId,
BQSchema.of(a),
BQPartitionType.DatePartitioned(Ident("date")),
description = None,
clustering = Nil,
TableLabels.Empty,
tableOptions = TableOptions.Empty.copy(partitionExpiration = expiration)
)

def remote(expiration: Option[FiniteDuration]) = Some(
TableInfo
.newBuilder(
tableId.underlying,
StandardTableDefinition.newBuilder
.setSchema(SchemaHelper.toSchema(BQSchema.of(a)))
.setTimePartitioning(
TimePartitioning
.newBuilder(Type.DAY)
.setField("date")
.setExpirationMs(expiration.map(exp => Long.box(exp.toMillis)).orNull)
.build()
)
.build()
)
.build()
)

TableUpdateOperation.from(testTable(Some(1.day)), remote(None)) match {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add one more test case where we remove expiration from remote?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I amended it to the original commit.

case UpdateOperation.UpdateTable(_, _, table) =>
assert(table.getDefinition[StandardTableDefinition].getTimePartitioning.getExpirationMs == 1.day.toMillis)
case other => fail(other.toString)
}
TableUpdateOperation.from(testTable(None), remote(Some(1.day))) match {
case UpdateOperation.UpdateTable(_, _, table) =>
assert(Option(table.getDefinition[StandardTableDefinition].getTimePartitioning.getExpirationMs).isEmpty)
case other => fail(other.toString)
}
TableUpdateOperation.from(testTable(Some(2.day)), remote(Some(1.day))) match {
case UpdateOperation.UpdateTable(_, _, table) =>
assert(table.getDefinition[StandardTableDefinition].getTimePartitioning.getExpirationMs == 2.day.toMillis)
case other => fail(other.toString)
}
TableUpdateOperation.from(testTable(Some(2.day)), remote(Some(2.day))) match {
case UpdateOperation.Noop(_) =>
case other => fail(other.toString)
}
TableUpdateOperation.from(testTable(None), remote(None)) match {
case UpdateOperation.Noop(_) =>
case other => fail(other.toString)
}
}

}