Skip to content

Commit

Permalink
[apache#1455] improvement(all) Refactor catalog property mapping to e…
Browse files Browse the repository at this point in the history
…ngine property (apache#1458)

### What changes were proposed in this pull request?

Use the new `PropertyConverter` in `catalog-common` to replace the old
one.

### Why are the changes needed?

We must provide a uniform property mapping system between Gravitino and
the query engine.
Fix: apache#1455 

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Existing test can cover this change.
  • Loading branch information
yuqi1129 authored and mchades committed Jan 24, 2024
1 parent d602ab3 commit 9db7245
Show file tree
Hide file tree
Showing 20 changed files with 241 additions and 61 deletions.
1 change: 1 addition & 0 deletions catalogs/bundled-catalog/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies {
implementation(project(":catalogs:catalog-lakehouse-iceberg"))
implementation(project(":catalogs:catalog-jdbc-mysql"))
implementation(project(":catalogs:catalog-jdbc-postgresql"))
implementation(libs.slf4j.api)
}

tasks.withType<ShadowJar>(ShadowJar::class.java) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.catalog.property;

import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Transforming between gravitino schema/table/column property and engine property. */
public abstract class PropertyConverter {

private static final Logger LOG = LoggerFactory.getLogger(PropertyConverter.class);
/**
* Mapping that maps engine properties to Gravitino properties. It will return a map that holds
* the mapping between engine and gravitino properties.
*
* @return a map that holds the mapping from engine to Gravitino properties.
*/
public abstract Map<String, String> engineToGravitinoMapping();

/**
* Get the property metadata for the catalog. for more please see {@link
* HiveTablePropertiesMetadata#propertyEntries()} or {@link
* com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergTablePropertiesMetadata#propertyEntries()}
*/
public abstract Map<String, PropertyEntry<?>> gravitinoPropertyMeta();

Map<String, String> revsereMap(Map<String, String> map) {
Map<String, String> res = new HashMap<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
res.put(entry.getValue(), entry.getKey());
}

return res;
}

/** Convert Gravitino properties to engine properties. */
public Map<String, String> gravitinoToEngineProperties(Map<String, String> gravitinoProperties) {
Map<String, String> engineProperties = new HashMap<>();
Map<String, String> gravitinoToEngineMapping = revsereMap(engineToGravitinoMapping());
for (Map.Entry<String, String> entry : gravitinoProperties.entrySet()) {
String engineKey = gravitinoToEngineMapping.get(entry.getKey());
if (engineKey != null) {
engineProperties.put(engineKey, entry.getValue());
} else {
LOG.info("Property {} is not supported by engine", entry.getKey());
}
}
return engineProperties;
}

/**
* Convert engine properties to Gravitino properties.
*
* <p>If different engine has different behavior about error handling, you can override this
* method.
*/
public Map<String, Object> engineToGravitinoProperties(Map<String, Object> engineProperties) {
Map<String, Object> gravitinoProperties = new HashMap<>();
Map<String, String> engineToGravitinoMapping = engineToGravitinoMapping();

Map<String, PropertyEntry<?>> propertyEntryMap = gravitinoPropertyMeta();
for (Map.Entry<String, Object> entry : engineProperties.entrySet()) {
String gravitinoKey = engineToGravitinoMapping.get(entry.getKey());
if (gravitinoKey != null) {
PropertyEntry<?> propertyEntry = propertyEntryMap.get(gravitinoKey);
if (propertyEntry != null) {
// Check value is valid.
propertyEntry.decode(entry.getValue().toString());
}
gravitinoProperties.put(gravitinoKey, entry.getValue());
} else {
LOG.info("Property {} is not supported by Gravitino", entry.getKey());
}
}

// Check the required properties.
for (Map.Entry<String, PropertyEntry<?>> propertyEntry : propertyEntryMap.entrySet()) {
if (propertyEntry.getValue().isRequired()
&& !gravitinoProperties.containsKey(propertyEntry.getKey())) {
throw new IllegalArgumentException(
"Property " + propertyEntry.getKey() + " is required, you should ");
}
}

return gravitinoProperties;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,14 @@ void testHiveTableCreatedByTrino() {
Assertions.assertEquals(
"hdfs://localhost:9000/user/hive/warehouse/hive_schema.db/hive_table",
table.properties().get("location"));
Assertions.assertEquals("MANAGED_TABLE", table.properties().get("table-type"));
Assertions.assertEquals(
"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", table.properties().get("serde-lib"));
Assertions.assertEquals(
"org.apache.hadoop.mapred.TextInputFormat", table.properties().get("input-format"));
Assertions.assertEquals(
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
table.properties().get("output-format"));
}

@Test
Expand Down Expand Up @@ -728,6 +736,9 @@ void testHiveTableCreatedByGravitino() throws InterruptedException {
Assertions.assertTrue(
data.contains(
"location = 'hdfs://localhost:9000/user/hive/warehouse/hive_schema.db/hive_table'"));
Assertions.assertTrue(
data.contains("input_format = 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'"));
Assertions.assertTrue(data.contains("serde_lib = 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@

package com.datastrato.gravitino.trino.connector.catalog;

import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Transforming between gravitino schema/table/column property and Trino property. */
@Deprecated
public abstract class PropertyConverter {

private static final Logger LOG = LoggerFactory.getLogger(PropertyConverter.class);
Expand All @@ -22,12 +24,12 @@ public abstract class PropertyConverter {
*
* @return a map that holds the mapping from Trino to Gravitino properties.
*/
public abstract TreeBidiMap<String, String> trinoPropertyKeyToGravitino();
public abstract TreeBidiMap<String, String> engineToGravitino();

/** Convert Trino properties to Gravitino properties. */
public Map<String, String> toTrinoProperties(Map<String, String> properties) {
public Map<String, String> fromGravitinoProperties(Map<String, String> properties) {
Map<String, String> trinoProperties = new HashMap<>();
Map<String, String> gravitinoToTrinoMapping = trinoPropertyKeyToGravitino().inverseBidiMap();
Map<String, String> gravitinoToTrinoMapping = engineToGravitino().inverseBidiMap();
for (Map.Entry<String, String> entry : properties.entrySet()) {
String trinoKey = gravitinoToTrinoMapping.get(entry.getKey());
if (trinoKey != null) {
Expand All @@ -42,7 +44,7 @@ public Map<String, String> toTrinoProperties(Map<String, String> properties) {
/** Convert Gravitino properties to Trino properties. */
public Map<String, Object> toGravitinoProperties(Map<String, Object> properties) {
Map<String, Object> gravitinoProperties = new HashMap<>();
Map<String, String> trinoToGravitinoMapping = trinoPropertyKeyToGravitino();
Map<String, String> trinoToGravitinoMapping = engineToGravitino();
for (Map.Entry<String, Object> entry : properties.entrySet()) {
String gravitinoKey = trinoToGravitinoMapping.get(entry.getKey());
if (gravitinoKey != null) {
Expand All @@ -53,4 +55,6 @@ public Map<String, Object> toGravitinoProperties(Map<String, Object> properties)
}
return gravitinoProperties;
}

public abstract Map<String, PropertyEntry<?>> gravitinoPropertyMeta();
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

package com.datastrato.gravitino.trino.connector.catalog.hive;

import com.datastrato.catalog.property.PropertyConverter;
import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.google.common.collect.ImmutableMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -23,7 +25,7 @@ public class HiveCatalogPropertyConverter extends PropertyConverter {
.put("hive.storage-format", "hive.storage-format")
.put("hive.compression-codec", "hive.compression-codec")
.put("hive.config.resources", "hive.config.resources")
.put("hive.recursive-directories", "hive.ignore-absent-partitions")
.put("hive.recursive-directories", "hive.recursive-directories")
.put("hive.ignore-absent-partitions", "hive.ignore-absent-partitions")
.put("hive.force-local-scheduling", "hive.force-local-scheduling")
.put("hive.respect-table-format", "hive.respect-table-format")
Expand All @@ -41,7 +43,12 @@ public class HiveCatalogPropertyConverter extends PropertyConverter {
.build());

@Override
public TreeBidiMap<String, String> trinoPropertyKeyToGravitino() {
public TreeBidiMap<String, String> engineToGravitinoMapping() {
return TRINO_KEY_TO_GRAVITINO_KEY;
}

@Override
public Map<String, PropertyEntry<?>> gravitinoPropertyMeta() {
return ImmutableMap.of();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
*/
package com.datastrato.gravitino.trino.connector.catalog.hive;

import com.datastrato.catalog.property.PropertyConverter;
import com.datastrato.gravitino.trino.connector.catalog.CatalogConnectorAdapter;
import com.datastrato.gravitino.trino.connector.catalog.CatalogConnectorMetadataAdapter;
import com.datastrato.gravitino.trino.connector.catalog.HasPropertyMeta;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.datastrato.gravitino.trino.connector.metadata.GravitinoCatalog;
import com.google.common.collect.Maps;
import io.trino.spi.session.PropertyMetadata;
Expand Down Expand Up @@ -38,7 +38,8 @@ public Map<String, Object> buildInternalConnectorConfig(GravitinoCatalog catalog

Map<String, Object> properties = new HashMap<>();
properties.put("hive.metastore.uri", catalog.getRequiredProperty("metastore.uris"));
Map<String, String> trinoProperty = catalogConverter.toTrinoProperties(catalog.getProperties());
Map<String, String> trinoProperty =
catalogConverter.gravitinoToEngineProperties(catalog.getProperties());

// Trino only supports properties that define in catalogPropertyMeta, the name of entries in
// catalogPropertyMeta is in the format of "catalogName_propertyName", so we need to replace
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
*/
package com.datastrato.gravitino.trino.connector.catalog.hive;

import com.datastrato.catalog.property.PropertyConverter;
import com.datastrato.gravitino.trino.connector.catalog.CatalogConnectorMetadataAdapter;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import io.trino.spi.session.PropertyMetadata;
import java.util.List;
import java.util.Map;
Expand All @@ -27,25 +27,25 @@ public HiveMetadataAdapter(

@Override
public Map<String, Object> toTrinoTableProperties(Map<String, String> properties) {
Map<String, String> objectMap = tableConverter.toTrinoProperties(properties);
Map<String, String> objectMap = tableConverter.gravitinoToEngineProperties(properties);
return super.toTrinoTableProperties(objectMap);
}

@Override
public Map<String, Object> toTrinoSchemaProperties(Map<String, String> properties) {
Map<String, String> objectMap = schemaConverter.toTrinoProperties(properties);
Map<String, String> objectMap = schemaConverter.gravitinoToEngineProperties(properties);
return super.toTrinoSchemaProperties(objectMap);
}

@Override
public Map<String, String> toGravitinoTableProperties(Map<String, Object> properties) {
Map<String, Object> stringMap = tableConverter.toGravitinoProperties(properties);
Map<String, Object> stringMap = tableConverter.engineToGravitinoProperties(properties);
return super.toGravitinoTableProperties(stringMap);
}

@Override
public Map<String, String> toGravitinoSchemaProperties(Map<String, Object> properties) {
Map<String, Object> stringMap = schemaConverter.toGravitinoProperties(properties);
Map<String, Object> stringMap = schemaConverter.engineToGravitinoProperties(properties);
return super.toGravitinoSchemaProperties(stringMap);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,33 @@
*/
package com.datastrato.gravitino.trino.connector.catalog.hive;

import com.datastrato.catalog.property.PropertyConverter;
import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.catalog.hive.HiveSchemaPropertiesMetadata;
import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.google.common.collect.ImmutableMap;
import java.util.Map;

public class HiveSchemaPropertyConverter extends PropertyConverter {
private final BasePropertiesMetadata hiveSchemaPropertiesMetadata =
new HiveSchemaPropertiesMetadata();

// Trino property key does not allow upper case character and '-', so we need to map it to
// Gravitino
private static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
new TreeBidiMap<>(
new ImmutableMap.Builder<String, String>().put("location", "location").build());
new ImmutableMap.Builder<String, String>()
.put("location", HiveSchemaPropertiesMetadata.LOCATION)
.build());

@Override
public TreeBidiMap<String, String> trinoPropertyKeyToGravitino() {
public TreeBidiMap<String, String> engineToGravitinoMapping() {
return TRINO_KEY_TO_GRAVITINO_KEY;
}

@Override
public Map<String, PropertyEntry<?>> gravitinoPropertyMeta() {
return hiveSchemaPropertiesMetadata.propertyEntries();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,43 @@
*/
package com.datastrato.gravitino.trino.connector.catalog.hive;

import com.datastrato.catalog.property.PropertyConverter;
import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import java.util.Map;

public class HiveTablePropertyConverter extends PropertyConverter {

private final BasePropertiesMetadata hiveTablePropertiesMetadata =
new HiveTablePropertiesMetadata();
// Trino property key does not allow upper case character and '-', so we need to map it to
// Gravitino
@VisibleForTesting
static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
new TreeBidiMap<>(
new ImmutableMap.Builder<String, String>()
.put("format", "format")
.put("total_size", "totalSize")
.put("num_files", "numFiles")
.put("external", "external")
.put("location", "location")
.put("table_type", "table-type")
.put("input_format", "input-format")
.put("output_format", "output-format")
.put("serde_lib", "serde-lib")
.put("serde_name", "serde-name")
.put("format", HiveTablePropertiesMetadata.FORMAT)
.put("total_size", HiveTablePropertiesMetadata.TOTAL_SIZE)
.put("num_files", HiveTablePropertiesMetadata.NUM_FILES)
.put("external", HiveTablePropertiesMetadata.EXTERNAL)
.put("location", HiveTablePropertiesMetadata.LOCATION)
.put("table_type", HiveTablePropertiesMetadata.TABLE_TYPE)
.put("input_format", HiveTablePropertiesMetadata.INPUT_FORMAT)
.put("output_format", HiveTablePropertiesMetadata.OUTPUT_FORMAT)
.put("serde_lib", HiveTablePropertiesMetadata.SERDE_LIB)
.put("serde_name", HiveTablePropertiesMetadata.SERDE_NAME)
.build());

@Override
public TreeBidiMap<String, String> trinoPropertyKeyToGravitino() {
public TreeBidiMap<String, String> engineToGravitinoMapping() {
return TRINO_KEY_TO_GRAVITINO_KEY;
}

@Override
public Map<String, PropertyEntry<?>> gravitinoPropertyMeta() {
return hiveTablePropertiesMetadata.propertyEntries();
}
}
Loading

0 comments on commit 9db7245

Please sign in to comment.