Skip to content

Commit

Permalink
[#3966] feat(core): add cloud and region property for catalog (#3967)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

add `cloud.name` and `cloud.region-code` property for catalog

### Why are the changes needed?

Fix: #3966 

### Does this PR introduce _any_ user-facing change?

yes, introduce some limitations of `cloud.name` property

### How was this patch tested?

tests added
  • Loading branch information
mchades authored Jun 26, 2024
1 parent f958335 commit f4e43a8
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 7 deletions.
30 changes: 30 additions & 0 deletions api/src/main/java/com/datastrato/gravitino/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ enum Type {
UNSUPPORTED
}

/** The cloud that the catalog is running on. Used by the catalog property `cloud.name`. */
enum CloudName {
/** Amazon Web Services */
AWS,

/** Microsoft Azure */
AZURE,

/** Google Cloud Platform */
GCP,

/** Not running on cloud */
ON_PREMISE,

/** Other cloud providers */
OTHER
}

/**
* A reserved property to specify the package location of the catalog. The "package" is a string
* of path to the folder where all the catalog related dependencies is located. The dependencies
Expand All @@ -44,6 +62,18 @@ enum Type {
*/
String PROPERTY_PACKAGE = "package";

/**
* The property to specify the cloud that the catalog is running on. The value should be one of
* the {@link CloudName}.
*/
String CLOUD_NAME = "cloud.name";

/**
* The property to specify the region code of the cloud that the catalog is running on. The value
* should be the region code of the cloud provider.
*/
String CLOUD_REGION_CODE = "cloud.region-code";

/** @return The name of the catalog. */
String name();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package com.datastrato.gravitino.catalog.hive;

import static com.datastrato.gravitino.Catalog.CLOUD_NAME;
import static com.datastrato.gravitino.Catalog.CLOUD_REGION_CODE;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CHECK_INTERVAL_SEC;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS;
import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_SIZE;
Expand Down Expand Up @@ -68,7 +70,7 @@ void testPropertyMeta() {
Map<String, PropertyEntry<?>> propertyEntryMap =
HIVE_PROPERTIES_METADATA.catalogPropertiesMetadata().propertyEntries();

Assertions.assertEquals(12, propertyEntryMap.size());
Assertions.assertEquals(14, propertyEntryMap.size());
Assertions.assertTrue(propertyEntryMap.containsKey(METASTORE_URIS));
Assertions.assertTrue(propertyEntryMap.containsKey(Catalog.PROPERTY_PACKAGE));
Assertions.assertTrue(propertyEntryMap.containsKey(BaseCatalog.CATALOG_OPERATION_IMPL));
Expand All @@ -86,6 +88,8 @@ void testPropertyMeta() {
Assertions.assertFalse(propertyEntryMap.get(PRINCIPAL).isRequired());
Assertions.assertFalse(propertyEntryMap.get(CHECK_INTERVAL_SEC).isRequired());
Assertions.assertFalse(propertyEntryMap.get(FETCH_TIMEOUT_SEC).isRequired());
Assertions.assertFalse(propertyEntryMap.get(CLOUD_NAME).isRequired());
Assertions.assertFalse(propertyEntryMap.get(CLOUD_REGION_CODE).isRequired());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@

package com.datastrato.gravitino.connector;

import static com.datastrato.gravitino.Catalog.CLOUD_NAME;
import static com.datastrato.gravitino.Catalog.CLOUD_REGION_CODE;
import static com.datastrato.gravitino.Catalog.PROPERTY_PACKAGE;

import com.datastrato.gravitino.Catalog;
import com.datastrato.gravitino.annotation.Evolving;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import java.util.Map;

@Evolving
public abstract class BaseCatalogPropertiesMetadata extends BasePropertiesMetadata {

protected static final Map<String, PropertyEntry<?>> BASIC_CATALOG_PROPERTY_ENTRIES =
Maps.uniqueIndex(
ImmutableList.of(
Expand All @@ -31,6 +33,21 @@ public abstract class BaseCatalogPropertiesMetadata extends BasePropertiesMetada
false,
null,
false,
false)),
false),
PropertyEntry.enumPropertyEntry(
CLOUD_NAME,
"The cloud that the catalog is running on",
false /* required */,
true /* immutable */,
Catalog.CloudName.class,
null /* The default value does not work because if the user does not set it, this property will not be displayed */,
false /* hidden */,
false /* reserved */),
PropertyEntry.stringOptionalPropertyEntry(
CLOUD_REGION_CODE,
"The region code of the cloud that the catalog is running on",
false /* required */,
null /* The default value does not work because if the user does not set it, this property will not be displayed */,
false /* hidden */)),
PropertyEntry::getName);
}
2 changes: 2 additions & 0 deletions docs/apache-hive-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ The Hive catalog supports creating, updating, and deleting databases and tables

### Catalog properties

Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the Hive catalog has the following properties:

| Property Name | Description | Default Value | Required | Since Version |
|------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------|
| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 |
Expand Down
8 changes: 5 additions & 3 deletions docs/gravitino-server-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,11 @@ These rules only apply to the catalog properties and don't affect the schema or

Below is a list of catalog properties that will be used by all Gravitino catalogs:

| Configuration item | Description | Default value | Required | Since version |
|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
| `package` | The path of the catalog package, Gravitino leverages this path to load the related catalog libs and configurations. The package should consist two folders, `conf` (for catalog related configurations) and `libs` (for catalog related dependencies/jars) | (none) | No | 0.5.0 |
| Configuration item | Description | Default value | Required | Since version |
|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
| `package` | The path of the catalog package, Gravitino leverages this path to load the related catalog libs and configurations. The package should consist two folders, `conf` (for catalog related configurations) and `libs` (for catalog related dependencies/jars) | (none) | No | 0.5.0 |
| `cloud.name` | The property to specify the cloud that the catalog is running on. The valid values are `aws`, `azure`, `gcp`, `on_premise` and `other`. | (none) | No | 0.6.0 |
| `cloud.region-code` | The property to specify the region code of the cloud that the caatlog is running on. | (none) | No | 0.6.0 |


The following table lists the catalog specific properties and their default paths:
Expand Down
2 changes: 2 additions & 0 deletions docs/hadoop-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Hadoop 3. If there's any compatibility issue, please create an [issue](https://g

### Catalog properties

Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the Hadoop catalog has the following properties:

| Property Name | Description | Default Value | Required | Since Version |
|----------------------------------------------------|------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------------|---------------|
| `location` | The storage location managed by Hadoop catalog. | (none) | No | 0.5.0 |
Expand Down
2 changes: 1 addition & 1 deletion docs/jdbc-doris-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ You can check the relevant data source configuration in
[data source properties](https://commons.apache.org/proper/commons-dbcp/configuration.html) for
more details.

Here are the catalog properties defined in Gravitino for Doris catalog:
Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the Doris catalog has the following properties:

| Configuration item | Description | Default value | Required | Since Version |
|----------------------|-------------------------------------------------------------------------------------|---------------|----------|---------------|
Expand Down
1 change: 1 addition & 0 deletions docs/jdbc-mysql-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Check the relevant data source configuration in [data source properties](https:/
When you use the Gravitino with Trino. You can pass the Trino MySQL connector configuration using prefix `trino.bypass.`. For example, using `trino.bypass.join-pushdown.strategy` to pass the `join-pushdown.strategy` to the Gravitino MySQL catalog in Trino runtime.

If you use a JDBC catalog, you must provide `jdbc-url`, `jdbc-driver`, `jdbc-user` and `jdbc-password` to catalog properties.
Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the MySQL catalog has the following properties:

| Configuration item | Description | Default value | Required | Since Version |
|----------------------|--------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
Expand Down
1 change: 1 addition & 0 deletions docs/jdbc-postgresql-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ You can check the relevant data source configuration in [data source properties]
When you use the Gravitino with Trino. You can pass the Trino PostgreSQL connector configuration using prefix `trino.bypass.`. For example, using `trino.bypass.join-pushdown.strategy` to pass the `join-pushdown.strategy` to the Gravitino PostgreSQL catalog in Trino runtime.

If you use JDBC catalog, you must provide `jdbc-url`, `jdbc-driver`, `jdbc-database`, `jdbc-user` and `jdbc-password` to catalog properties.
Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the PostgreSQL catalog has the following properties:

| Configuration item | Description | Default value | Required | Since Version |
|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
Expand Down
2 changes: 2 additions & 0 deletions docs/kafka-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ One Kafka catalog corresponds to one Kafka cluster.

### Catalog properties

Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the Kafka catalog has the following properties:

| Property Name | Description | Default Value | Required | Since Version |
|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
| `bootstrap.servers` | The Kafka broker(s) to connect to, allowing for multiple brokers by comma-separating them. | (none) | Yes | 0.5.0 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.datastrato.gravitino.integration.test.container.HiveContainer;
import com.datastrato.gravitino.integration.test.util.AbstractIT;
import com.datastrato.gravitino.integration.test.util.GravitinoITUtils;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.io.File;
import java.util.Collections;
Expand Down Expand Up @@ -121,7 +122,27 @@ public void testCreateCatalogWithoutProperties() {
Assertions.assertEquals("hadoop", catalog.provider());
Assertions.assertEquals("catalog comment", catalog.comment());
Assertions.assertTrue(catalog.properties().isEmpty());
metalake.dropCatalog(catalogName);

// test cloud related properties
ImmutableMap<String, String> illegalProps = ImmutableMap.of("cloud.name", "myCloud");
IllegalArgumentException exception =
Assertions.assertThrows(
IllegalArgumentException.class,
() ->
metalake.createCatalog(
catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment", illegalProps));
Assertions.assertTrue(exception.getMessage().contains("Invalid value [myCloud]"));

ImmutableMap<String, String> props =
ImmutableMap.of("cloud.name", "aws", "cloud.region-code", "us-west-2");
catalog =
metalake.createCatalog(
catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment", props);
Assertions.assertTrue(metalake.catalogExists(catalogName));
Assertions.assertFalse(catalog.properties().isEmpty());
Assertions.assertEquals("aws", catalog.properties().get("cloud.name"));
Assertions.assertEquals("us-west-2", catalog.properties().get("cloud.region-code"));
metalake.dropCatalog(catalogName);
}

Expand Down

0 comments on commit f4e43a8

Please sign in to comment.