Skip to content

Commit

Permalink
fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
FANNG1 committed Mar 27, 2024
1 parent 21e092d commit 6bd0b1b
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ public enum TableType {
VIRTUAL_INDEX,
}

enum StorageFormat {
// In embedded test mode, HiveTablePropertiesMetadata will be loaded by spark connector which has
// different classloaders with Hive catalog. If StorageFormat is package scope, it couldn't
// be accessed by Hive catalog related classes in same package, so making it public.
public enum StorageFormat {
SEQUENCEFILE(
SEQUENCEFILE_INPUT_FORMAT_CLASS, SEQUENCEFILE_OUTPUT_FORMAT_CLASS, LAZY_SIMPLE_SERDE_CLASS),
TEXTFILE(TEXT_INPUT_FORMAT_CLASS, IGNORE_KEY_OUTPUT_FORMAT_CLASS, LAZY_SIMPLE_SERDE_CLASS),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo;
import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfo.SparkColumnInfo;
import com.datastrato.gravitino.integration.test.util.spark.SparkTableInfoChecker;
import com.datastrato.gravitino.spark.connector.hive.HivePropertyConstants;
import com.datastrato.gravitino.spark.connector.hive.HivePropertiesConstants;
import com.google.common.collect.ImmutableMap;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -166,12 +166,12 @@ void testHiveDefaultFormat() {
.withName(tableName)
.withTableProperties(
ImmutableMap.of(
HivePropertyConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertyConstants.TEXT_INPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertyConstants.IGNORE_KEY_OUTPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_SERDE_LIB,
HivePropertyConstants.LAZY_SIMPLE_SERDE_CLASS));
HivePropertiesConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertiesConstants.TEXT_INPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.IGNORE_KEY_OUTPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_SERDE_LIB,
HivePropertiesConstants.LAZY_SIMPLE_SERDE_CLASS));
checker.check(tableInfo);
checkTableReadWrite(tableInfo);
}
Expand All @@ -190,12 +190,12 @@ void testHiveFormatWithStoredAs() {
.withName(tableName)
.withTableProperties(
ImmutableMap.of(
HivePropertyConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertyConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertyConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_SERDE_LIB,
HivePropertyConstants.PARQUET_SERDE_CLASS));
HivePropertiesConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertiesConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_SERDE_LIB,
HivePropertiesConstants.PARQUET_SERDE_CLASS));
checker.check(tableInfo);
checkTableReadWrite(tableInfo);
checkParquetFile(tableInfo);
Expand All @@ -215,12 +215,12 @@ void testHiveFormatWithUsing() {
.withName(tableName)
.withTableProperties(
ImmutableMap.of(
HivePropertyConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertyConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertyConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_SERDE_LIB,
HivePropertyConstants.PARQUET_SERDE_CLASS));
HivePropertiesConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertiesConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_SERDE_LIB,
HivePropertiesConstants.PARQUET_SERDE_CLASS));
checker.check(tableInfo);
checkTableReadWrite(tableInfo);
checkParquetFile(tableInfo);
Expand All @@ -235,9 +235,9 @@ void testHivePropertiesWithSerdeRowFormat() {
String.format(
"%s ROW FORMAT SERDE '%s' WITH SERDEPROPERTIES ('serialization.format'='@', 'field.delim' = ',') STORED AS INPUTFORMAT '%s' OUTPUTFORMAT '%s'",
createTableSql,
HivePropertyConstants.PARQUET_SERDE_CLASS,
HivePropertyConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertyConstants.PARQUET_OUTPUT_FORMAT_CLASS);
HivePropertiesConstants.PARQUET_SERDE_CLASS,
HivePropertiesConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertiesConstants.PARQUET_OUTPUT_FORMAT_CLASS);
sql(createTableSql);
SparkTableInfo tableInfo = getTableInfo(tableName);

Expand All @@ -250,12 +250,12 @@ void testHivePropertiesWithSerdeRowFormat() {
"@",
TableCatalog.OPTION_PREFIX + "field.delim",
",",
HivePropertyConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertyConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertyConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_SERDE_LIB,
HivePropertyConstants.PARQUET_SERDE_CLASS));
HivePropertiesConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertiesConstants.PARQUET_INPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.PARQUET_OUTPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_SERDE_LIB,
HivePropertiesConstants.PARQUET_SERDE_CLASS));
checker.check(tableInfo);
checkTableReadWrite(tableInfo);
checkParquetFile(tableInfo);
Expand Down Expand Up @@ -297,12 +297,12 @@ void testHivePropertiesWithDelimitedRowFormat() {
",",
TableCatalog.OPTION_PREFIX + "colelction.delim",
"@",
HivePropertyConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertyConstants.TEXT_INPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertyConstants.IGNORE_KEY_OUTPUT_FORMAT_CLASS,
HivePropertyConstants.SPARK_HIVE_SERDE_LIB,
HivePropertyConstants.LAZY_SIMPLE_SERDE_CLASS));
HivePropertiesConstants.SPARK_HIVE_INPUT_FORMAT,
HivePropertiesConstants.TEXT_INPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.IGNORE_KEY_OUTPUT_FORMAT_CLASS,
HivePropertiesConstants.SPARK_HIVE_SERDE_LIB,
HivePropertiesConstants.LAZY_SIMPLE_SERDE_CLASS));
checker.check(tableInfo);
checkTableReadWrite(tableInfo);

Expand Down
1 change: 1 addition & 0 deletions spark-connector/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies {
implementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion")
implementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion")
implementation("org.scala-lang.modules:scala-java8-compat_$scalaVersion:$scalaJava8CompatVersion")
implementation(project(mapOf("path" to ":catalogs:catalog-hive")))

annotationProcessor(libs.lombok)
compileOnly(libs.lombok)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
package com.datastrato.gravitino.spark.connector.hive;

import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.StorageFormat;
import com.google.common.annotations.VisibleForTesting;

public class HivePropertyConstants {
public class HivePropertiesConstants {
public static final String GRAVITINO_HIVE_FORMAT = HiveTablePropertiesMetadata.FORMAT;
public static final String GRAVITINO_HIVE_INPUT_FORMAT = HiveTablePropertiesMetadata.INPUT_FORMAT;
public static final String GRAVITINO_HIVE_OUTPUT_FORMAT =
Expand All @@ -16,23 +18,43 @@ public class HivePropertyConstants {
public static final String GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX =
HiveTablePropertiesMetadata.SERDE_PARAMETER_PREFIX;

public static final String GRAVITINO_HIVE_FORMAT_PARQUET = StorageFormat.PARQUET.toString();
public static final String GRAVITINO_HIVE_FORMAT_SEQUENCEFILE =
StorageFormat.SEQUENCEFILE.toString();
public static final String GRAVITINO_HIVE_FORMAT_ORC = StorageFormat.ORC.toString();
public static final String GRAVITINO_HIVE_FORMAT_RCFILE = StorageFormat.RCFILE.toString();
public static final String GRAVITINO_HIVE_FORMAT_TEXTFILE = StorageFormat.TEXTFILE.toString();
public static final String GRAVITINO_HIVE_FORMAT_AVRO = StorageFormat.AVRO.toString();
public static final String GRAVITINO_HIVE_FORMAT_JSON = StorageFormat.JSON.toString();
public static final String GRAVITINO_HIVE_FORMAT_CSV = StorageFormat.CSV.toString();

public static final String SPARK_HIVE_STORED_AS = "hive.stored-as";
public static final String SPARK_HIVE_INPUT_FORMAT = "input-format";
public static final String SPARK_HIVE_OUTPUT_FORMAT = "output-format";
public static final String SPARK_HIVE_SERDE_LIB = "serde-lib";

@VisibleForTesting
public static final String TEXT_INPUT_FORMAT_CLASS =
HiveTablePropertiesMetadata.TEXT_INPUT_FORMAT_CLASS;

@VisibleForTesting
public static final String IGNORE_KEY_OUTPUT_FORMAT_CLASS =
HiveTablePropertiesMetadata.IGNORE_KEY_OUTPUT_FORMAT_CLASS;

@VisibleForTesting
public static final String LAZY_SIMPLE_SERDE_CLASS =
HiveTablePropertiesMetadata.LAZY_SIMPLE_SERDE_CLASS;

@VisibleForTesting
public static final String PARQUET_INPUT_FORMAT_CLASS =
HiveTablePropertiesMetadata.PARQUET_INPUT_FORMAT_CLASS;

@VisibleForTesting
public static final String PARQUET_OUTPUT_FORMAT_CLASS =
HiveTablePropertiesMetadata.PARQUET_OUTPUT_FORMAT_CLASS;

@VisibleForTesting
public static final String PARQUET_SERDE_CLASS = HiveTablePropertiesMetadata.PARQUET_SERDE_CLASS;

private HivePropertyConstants() {}
private HivePropertiesConstants() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,46 @@ public class HivePropertiesConverter implements PropertiesConverter {
// Transform Spark hive file format to Gravitino hive file format
static final Map<String, String> fileFormatMap =
ImmutableMap.of(
"sequencefile", "SEQUENCEFILE",
"rcfile", "RCFILE",
"orc", "ORC",
"parquet", "PARQUET",
"textfile", "TEXTFILE",
"avro", "AVRO");
"sequencefile", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_SEQUENCEFILE,
"rcfile", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_RCFILE,
"orc", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_ORC,
"parquet", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_PARQUET,
"textfile", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_TEXTFILE,
"json", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_JSON,
"csv", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_CSV,
"avro", HivePropertiesConstants.GRAVITINO_HIVE_FORMAT_AVRO);

static final Map<String, String> sparkToGravitinoPropertyMap =
ImmutableMap.of(
"hive.output-format",
HivePropertyConstants.GRAVITINO_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.GRAVITINO_HIVE_OUTPUT_FORMAT,
"hive.input-format",
HivePropertyConstants.GRAVITINO_HIVE_INPUT_FORMAT,
HivePropertiesConstants.GRAVITINO_HIVE_INPUT_FORMAT,
"hive.serde",
HivePropertyConstants.GRAVITINO_HIVE_SERDE_LIB);
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_LIB);

/**
* CREATE TABLE xxx STORED AS PARQUET will save "hive.stored.as" = "PARQUET" in property. CREATE
* TABLE xxx USING PARQUET will save "provider" = "PARQUET" in property. CREATE TABLE xxx ROW
* FORMAT SERDE xx STORED AS INPUTFORMAT xx OUTPUTFORMAT xx will save "hive.input-format",
* "hive.output-format", "hive.serde" in property. CREATE TABLE xxx ROW FORMAT DELIMITED FIELDS
* TERMINATED xx will save "option.xx" in property.
* CREATE TABLE xxx STORED AS PARQUET will save "hive.stored.as" = "PARQUET" in property.
*
* <p>CREATE TABLE xxx USING PARQUET will save "provider" = "PARQUET" in property.
*
* <p>CREATE TABLE xxx ROW FORMAT SERDE xx STORED AS INPUTFORMAT xx OUTPUTFORMAT xx will save
* "hive.input-format", "hive.output-format", "hive.serde" in property.
*
* <p>CREATE TABLE xxx ROW FORMAT DELIMITED FIELDS TERMINATED xx will save "option.xx" in
* property.
*/
@Override
public Map<String, String> toGravitinoTableProperties(Map<String, String> properties) {
Map<String, String> gravitinoTableProperties = fromOptionProperties(properties);
String provider = gravitinoTableProperties.get(TableCatalog.PROP_PROVIDER);
String storeAs = gravitinoTableProperties.get(HivePropertyConstants.SPARK_HIVE_STORED_AS);
String storeAs = gravitinoTableProperties.get(HivePropertiesConstants.SPARK_HIVE_STORED_AS);
String fileFormat = Optional.ofNullable(storeAs).orElse(provider);
if (fileFormat != null) {
String gravitinoFormat = fileFormatMap.get(fileFormat.toLowerCase(Locale.ROOT));
if (gravitinoFormat != null) {
gravitinoTableProperties.put(HivePropertyConstants.GRAVITINO_HIVE_FORMAT, gravitinoFormat);
gravitinoTableProperties.put(
HivePropertiesConstants.GRAVITINO_HIVE_FORMAT, gravitinoFormat);
} else {
throw new NotSupportedException("Doesn't support hive file format: " + fileFormat);
}
Expand Down Expand Up @@ -82,10 +89,11 @@ static Map<String, String> toOptionProperties(Map<String, String> properties) {
Collectors.toMap(
entry -> {
String key = entry.getKey();
if (key.startsWith(HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX)) {
if (key.startsWith(
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX)) {
return TableCatalog.OPTION_PREFIX
+ key.substring(
HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX.length());
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX.length());
} else {
return key;
}
Expand All @@ -102,7 +110,7 @@ static Map<String, String> fromOptionProperties(Map<String, String> properties)
entry -> {
String key = entry.getKey();
if (key.startsWith(TableCatalog.OPTION_PREFIX)) {
return HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX
return HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX
+ key.substring(TableCatalog.OPTION_PREFIX.length());
} else {
return key;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ void testTableFormat() {
// stored as
Map<String, String> hiveProperties =
hivePropertiesConverter.toGravitinoTableProperties(
ImmutableMap.of(HivePropertyConstants.SPARK_HIVE_STORED_AS, "PARQUET"));
ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_STORED_AS, "PARQUET"));
Assertions.assertEquals(
hiveProperties.get(HivePropertyConstants.GRAVITINO_HIVE_FORMAT), "PARQUET");
hiveProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_FORMAT), "PARQUET");
Assertions.assertThrowsExactly(
NotSupportedException.class,
() ->
hivePropertiesConverter.toGravitinoTableProperties(
ImmutableMap.of(HivePropertyConstants.SPARK_HIVE_STORED_AS, "notExists")));
ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_STORED_AS, "notExists")));

// using
hiveProperties =
hivePropertiesConverter.toGravitinoTableProperties(
ImmutableMap.of(TableCatalog.PROP_PROVIDER, "PARQUET"));
Assertions.assertEquals(
hiveProperties.get(HivePropertyConstants.GRAVITINO_HIVE_FORMAT), "PARQUET");
hiveProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_FORMAT), "PARQUET");
Assertions.assertThrowsExactly(
NotSupportedException.class,
() ->
Expand All @@ -51,11 +51,11 @@ void testTableFormat() {
"hive.input-format", "a", "hive.output-format", "b", "hive.serde", "c"));
Assertions.assertEquals(
ImmutableMap.of(
HivePropertyConstants.GRAVITINO_HIVE_INPUT_FORMAT,
HivePropertiesConstants.GRAVITINO_HIVE_INPUT_FORMAT,
"a",
HivePropertyConstants.GRAVITINO_HIVE_OUTPUT_FORMAT,
HivePropertiesConstants.GRAVITINO_HIVE_OUTPUT_FORMAT,
"b",
HivePropertyConstants.GRAVITINO_HIVE_SERDE_LIB,
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_LIB,
"c"),
hiveProperties);

Expand All @@ -64,13 +64,16 @@ void testTableFormat() {
ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "a", "b", "b"));
Assertions.assertEquals(
ImmutableMap.of(
HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "a", "b", "b"),
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "a", "b", "b"),
hiveProperties);

hiveProperties =
hivePropertiesConverter.toSparkTableProperties(
ImmutableMap.of(
HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "a", "b", "b"));
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a",
"a",
"b",
"b"));
Assertions.assertEquals(
ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "a", "b", "b"), hiveProperties);
}
Expand All @@ -82,13 +85,16 @@ void testOptionProperties() {
ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "1", "b", "2"));
Assertions.assertEquals(
ImmutableMap.of(
HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "1", "b", "2"),
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "1", "b", "2"),
properties);

properties =
HivePropertiesConverter.toOptionProperties(
ImmutableMap.of(
HivePropertyConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a", "1", "b", "2"));
HivePropertiesConstants.GRAVITINO_HIVE_SERDE_PARAMETER_PREFIX + "a",
"1",
"b",
"2"));
Assertions.assertEquals(
ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "1", "b", "2"), properties);
}
Expand Down

0 comments on commit 6bd0b1b

Please sign in to comment.