Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#1449] feat(catalogs): Introudce new module bundled-catalog for query engine. #1454

Merged
merged 18 commits into from
Jan 15, 2024
Merged
5 changes: 3 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ tasks {
subprojects.forEach() {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") && it.name != "trino-connector" &&
it.name != "integration-test"
it.name != "integration-test" && it.name != "bundled-catalog"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
Expand All @@ -465,7 +465,8 @@ tasks {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") &&
it.name != "trino-connector" &&
it.name != "integration-test"
it.name != "integration-test" &&
it.name != "bundled-catalog"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
Expand Down
76 changes: 76 additions & 0 deletions catalogs/bundled-catalog/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar

plugins {
id("java")
alias(libs.plugins.shadow)
}

dependencies {
implementation(project(":core"))
implementation(project(":catalogs:catalog-hive"))
implementation(project(":catalogs:catalog-lakehouse-iceberg"))
implementation(project(":catalogs:catalog-jdbc-mysql"))
implementation(project(":catalogs:catalog-jdbc-postgresql"))
}

tasks.withType<ShadowJar>(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.compileClasspath.get())
archiveClassifier.set("")

dependencies {
exclude("org.*")
exclude("javax.*")
}

exclude("**/package-info.class")
exclude("**/*.properties")
exclude("**/*.html")
exclude("org/**")
exclude("META-INF")
exclude("META-INF/**")
exclude("module-info.class")
exclude("com/google/**")
exclude("com/fasterxml/**")
exclude("javax/**")
exclude("schema/**")
exclude("fr/**")
exclude("google/**")
exclude("groovy/**")
exclude("images/**")
exclude("**/*.conf")
exclude("**/*.so")
exclude("**/*.sxd")
exclude("**/*.xsd")
exclude("*.ddl")
exclude("**/*.txt")
exclude("**/*.md")
exclude("**/*.dtd")
exclude("**/*.thrift")
exclude("**/*.jdo")
exclude("**/LICENSE")
exclude("**/*.MF")
exclude("**/*.xml")
exclude("*.proto")
exclude("*.template")
exclude("webapps")
exclude("license/*")
exclude("*.xml")
exclude("*.css")
exclude("*.jnilib")
exclude("*.dll")
exclude("*.jocl")
exclude("NOTICE")

minimize()
}

tasks.jar {
dependsOn(tasks.named("shadowJar"))
archiveClassifier.set("empty")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.catalog.common;

import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta;
import com.datastrato.gravitino.catalog.hive.HiveSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergCatalogPropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergTablePropertiesMetadata;
import java.util.HashSet;
import java.util.Set;

/**
* The {@link ClassProvider} class serves as a container for the necessary classes used by the
* Gravitino query engine, with a primary focus on classes related to property metadata.
*
* <p>Purpose of this module and class:
*
* <pre>
* - Catalog-related classes are essential for the query engine to directly access catalog information.
* - The query engine should be able to detect catalog changes and automatically reload catalog-related
* information to ensure synchronization.
* - Including catalog-related jar packages directly is suboptimal for query engines as it may introduce
* unnecessary content.
* </pre>
*
* Therefore, this module is used to store the required classes for the query engine's
* functionality.
*/
public class ClassProvider {

private static final Set<Class<?>> BASE_CLASS =
new HashSet<Class<?>>() {
{
add(BasePropertiesMetadata.class);
add(PropertyEntry.class);
add(PropertiesMetadata.class);
}
};

private static final Set<Class<?>> HIVE_NEED_CLASS =
new HashSet<Class<?>>() {
{
add(HiveTablePropertiesMetadata.class);
add(HiveSchemaPropertiesMetadata.class);
add(HiveCatalogPropertiesMeta.class);
}
};

private static final Set<Class<?>> ICEBERG_NEED_CLASS =
new HashSet<Class<?>>() {
{
add(IcebergTablePropertiesMetadata.class);
add(IcebergSchemaPropertiesMetadata.class);
add(IcebergCatalogPropertiesMetadata.class);
}
};

private static final Set<Class<?>> MYSQL_NEED_CLASS =
new HashSet<Class<?>>() {
{
// TODO
}
};

private static final Set<Class<?>> PG_NEED_CLASS =
new HashSet<Class<?>>() {
{
// TODO
}
};
}
8 changes: 6 additions & 2 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,16 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
// Why should we rename the jar files? Because the directory `build/libs` is the output directory of
// the task `build` and `copyDepends`. Task `shadowJar` of project `bundled-catalog` depends on the output
// of task `build` and mistakenly thinks that it depends on the task `copyDepends`, and errors occur.
// The same goes for `catalog-lakehouse-iceberg`, `catalog-jdbc-mysql` and `catalog-jdbc-postgresql`.
into("build/libs_all")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The build and copyDepends tasks of catalog-hive use the same output directory 'build/libs', which is needed by task shadowJar in bundled-catalog, gradle will get errors and says "task shadowJar depends on task copyDepends".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'd better add comment on the file to describe it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

}

val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/hive/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.datastrato.gravitino.Namespace;
import com.datastrato.gravitino.catalog.CatalogOperations;
import com.datastrato.gravitino.catalog.PropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.exceptions.NoSuchCatalogException;
import com.datastrato.gravitino.exceptions.NoSuchSchemaException;
Expand Down Expand Up @@ -52,7 +53,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.SERDE_NAME;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.SERDE_PARAMETER_PREFIX;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.rel.expressions.transforms.Transforms.identity;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;

import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.FromHiveType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.catalog.rel.BaseTable;
Expand Down Expand Up @@ -45,7 +46,6 @@
import lombok.ToString;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import static com.datastrato.gravitino.catalog.PropertyEntry.stringImmutablePropertyEntry;
import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.StorageFormat.TEXTFILE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;

import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
Expand All @@ -19,7 +19,6 @@
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.metastore.TableType;

public class HiveTablePropertiesMetadata extends BasePropertiesMetadata {
public static final String COMMENT = "comment";
Expand Down Expand Up @@ -82,6 +81,14 @@ public class HiveTablePropertiesMetadata extends BasePropertiesMetadata {

private static final String REGEX_SERDE_CLASS = "org.apache.hadoop.hive.serde2.RegexSerDe";

public enum TableType {
MANAGED_TABLE,
EXTERNAL_TABLE,
VIRTUAL_VIEW,
INDEX_TABLE,
VIRTUAL_INDEX,
}

enum StorageFormat {
SEQUENCEFILE(
SEQUENCEFILE_INPUT_FORMAT_CLASS, SEQUENCEFILE_OUTPUT_FORMAT_CLASS, LAZY_SIMPLE_SERDE_CLASS),
Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-jdbc-mysql/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/jdbc-mysql/libs")
}

Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-jdbc-postgresql/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/jdbc-postgresql/libs")
}

Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/lakehouse-iceberg/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,19 @@ public static <T extends Enum<T>> PropertyEntry<T> enumImmutablePropertyEntry(
return enumPropertyEntry(
name, description, required, true, javaType, defaultValue, hidden, reserved);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("PropertyEntry{");
sb.append("name='").append(name).append('\'');
sb.append(", description='").append(description).append('\'');
sb.append(", required=").append(required);
sb.append(", immutable=").append(immutable);
sb.append(", javaType=").append(javaType);
sb.append(", defaultValue=").append(defaultValue);
sb.append(", hidden=").append(hidden);
sb.append(", reserved=").append(reserved);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TEXT_INPUT_FORMAT_CLASS;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TOTAL_SIZE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TRANSIENT_LAST_DDL_TIME;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;
import static org.apache.hadoop.hive.serde.serdeConstants.DATE_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.INT_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
Expand All @@ -35,6 +35,7 @@
import com.datastrato.gravitino.catalog.hive.HiveClientPool;
import com.datastrato.gravitino.catalog.hive.HiveSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.client.GravitinoMetaLake;
import com.datastrato.gravitino.dto.rel.ColumnDTO;
import com.datastrato.gravitino.dto.rel.expressions.FieldReferenceDTO;
Expand Down Expand Up @@ -77,7 +78,6 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
Expand Down
2 changes: 1 addition & 1 deletion settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ plugins {
rootProject.name = "gravitino"

include("api", "common", "core", "meta", "server", "integration-test", "server-common")
include("catalogs:catalog-hive", "catalogs:catalog-lakehouse-iceberg", "catalogs:catalog-jdbc-common", "catalogs:catalog-jdbc-mysql", "catalogs:catalog-jdbc-postgresql")
include("catalogs:bundled-catalog", "catalogs:catalog-hive", "catalogs:catalog-lakehouse-iceberg", "catalogs:catalog-jdbc-common", "catalogs:catalog-jdbc-mysql", "catalogs:catalog-jdbc-postgresql")
include("clients:client-java", "clients:client-java-runtime")
include("trino-connector")
include("web")
Expand Down
1 change: 1 addition & 0 deletions trino-connector/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ repositories {

dependencies {
implementation(project(":clients:client-java-runtime", configuration = "shadow"))
implementation(project(":catalogs:bundled-catalog", configuration = "shadow"))
implementation(libs.jackson.databind)
implementation(libs.jackson.annotations)
implementation(libs.guava)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@

import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;

public class HiveTablePropertyConverter extends PropertyConverter {

// Trino property key does not allow upper case character and '-', so we need to map it to
// Gravitino
private static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
@VisibleForTesting
static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
new TreeBidiMap<>(
new ImmutableMap.Builder<String, String>()
.put("format", "format")
Expand Down
Loading
Loading