Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-50795][SQL] Store timestamp as long type in describe LinkedHashMap #49513

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.catalog

import java.net.URI
import java.time.{ZoneId, ZoneOffset}
import java.util.Date

import scala.collection.mutable
import scala.util.control.NonFatal
Expand All @@ -27,7 +28,7 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
import org.apache.commons.lang3.StringUtils
import org.json4s.JsonAST.{JArray, JBool, JDouble, JInt, JNull, JObject, JString, JValue}
import org.json4s.JsonAST.{JArray, JBool, JDouble, JInt, JLong, JNull, JObject, JString, JValue}
import org.json4s.jackson.JsonMethods._

import org.apache.spark.SparkException
Expand Down Expand Up @@ -63,6 +64,7 @@ trait MetadataMapSupport {
protected def jsonToString(
jsonMap: mutable.LinkedHashMap[String, JValue]): mutable.LinkedHashMap[String, String] = {
val map = new mutable.LinkedHashMap[String, String]()
val timestampKeys = Set("Created Time", "Last Access")
jsonMap.foreach { case (key, jValue) =>
val stringValue = jValue match {
case JString(value) => value
Expand All @@ -80,20 +82,18 @@ trait MetadataMapSupport {
.mkString("[", ", ", "]")
case JInt(value) => value.toString
case JDouble(value) => value.toString
case JLong(value) =>
if (timestampKeys.contains(key)) {
new Date(value).toString
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use Iso8601TimestampFormatter instead of the legacy class Date.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the behavior for DESCRIBE TABLE without AS JSON, see 36d23ef#diff-f2a04f920c41d18a7d387216f86405bfdc6fb09c44ebe1bb09312ba7dde55333L136

Here we just revert it back.

} else {
value.toString
}
case _ => jValue.values.toString
}
map.put(key, stringValue)
}
map
}

val timestampFormatter = new Iso8601TimestampFormatter(
pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'",
zoneId = ZoneId.of("UTC"),
locale = DateFormatter.defaultLocale,
legacyFormat = LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT,
isParsing = true
)
}


Expand Down Expand Up @@ -191,12 +191,10 @@ case class CatalogTablePartition(
map += ("Partition Parameters" -> paramsJson)
}

map += ("Created Time" -> JString(
timestampFormatter.format(DateTimeUtils.millisToMicros(createTime))))
map += ("Created Time" -> JLong(createTime))

val lastAccess = if (lastAccessTime <= 0) JString("UNKNOWN")
else JString(
timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
else JLong(lastAccessTime)
map += ("Last Access" -> lastAccess)

stats.foreach(s => map += ("Partition Statistics" -> JString(s.simpleString)))
Expand Down Expand Up @@ -605,7 +603,7 @@ case class CatalogTable(

val lastAccess: JValue =
if (lastAccessTime <= 0) JString("UNKNOWN")
else JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
else JLong(lastAccessTime)

val viewQueryOutputColumns: JValue =
if (viewQueryColumnNames.nonEmpty) JArray(viewQueryColumnNames.map(JString).toList)
Expand All @@ -617,8 +615,7 @@ case class CatalogTable(
if (identifier.database.isDefined) map += "Database" -> JString(identifier.database.get)
map += "Table" -> JString(identifier.table)
if (Option(owner).exists(_.nonEmpty)) map += "Owner" -> JString(owner)
map += "Created Time" ->
JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
map += "Created Time" -> JLong(createTime)
if (lastAccess != JNull) map += "Last Access" -> lastAccess
map += "Created By" -> JString(s"Spark $createVersion")
map += "Type" -> JString(tableType.name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.execution.command

import java.time.ZoneId

import scala.collection.mutable

import org.json4s._
Expand All @@ -29,7 +31,13 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, Se
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.util.quoteIfNeeded
import org.apache.spark.sql.catalyst.util.{
quoteIfNeeded,
DateFormatter,
DateTimeUtils,
Iso8601TimestampFormatter,
LegacyDateFormats
}
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
import org.apache.spark.sql.connector.catalog.V1Table
import org.apache.spark.sql.errors.QueryCompilationErrors
Expand All @@ -50,6 +58,13 @@ case class DescribeRelationJsonCommand(
nullable = false,
new MetadataBuilder().putString("comment", "JSON metadata of the table").build())()
)) extends UnaryRunnableCommand {
private lazy val timestampFormatter = new Iso8601TimestampFormatter(
pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'",
zoneId = ZoneId.of("UTC"),
locale = DateFormatter.defaultLocale,
legacyFormat = LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT,
isParsing = true
)

override def run(sparkSession: SparkSession): Seq[Row] = {
val jsonMap = mutable.LinkedHashMap[String, JValue]()
Expand Down Expand Up @@ -106,11 +121,22 @@ case class DescribeRelationJsonCommand(
"outputformat" -> "output_format"
)

val timestampKeys = Set("created_time", "last_access")

val normalizedKey = key.toLowerCase().replace(" ", "_")
val renamedKey = renames.getOrElse(normalizedKey, normalizedKey)

if (!jsonMap.contains(renamedKey) && !excludedKeys.contains(renamedKey)) {
jsonMap += renamedKey -> value
val formattedValue = if (timestampKeys.contains(renamedKey)) {
value match {
case JLong(timestamp) =>
JString(timestampFormatter.format(DateTimeUtils.millisToMicros(timestamp)))
case _ => value
}
} else {
value
}
jsonMap += renamedKey -> formattedValue
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,21 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
Row("Table Properties", "[bar=baz]", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Row("Partition Provider", "Catalog", "")))

// example date format: Mon Nov 01 12:00:00 UTC 2021
val dayOfWeek = raw"[A-Z][a-z]{2}"
val month = raw"[A-Z][a-z]{2}"
val day = raw"\s?[0-9]{1,2}"
val time = raw"[0-9]{2}:[0-9]{2}:[0-9]{2}"
val timezone = raw"[A-Z]{3,4}"
val year = raw"[0-9]{4}"

val timeRegex = raw"""$dayOfWeek $month $day $time $timezone $year""".r

val createdTimeValue = descriptionDf.filter("col_name = 'Created Time'")
.collect().head.getString(1).trim

assert(timeRegex.matches(createdTimeValue))
}
}

Expand Down
Loading