From 96014e08e3e1dd29af680e92c7f9f9b3768b60c3 Mon Sep 17 00:00:00 2001 From: cai can <94670132+caican00@users.noreply.github.com> Date: Fri, 30 Aug 2024 20:11:59 +0800 Subject: [PATCH] [#4814] Improvement (catalog-lakehouse-paimon): use purgeTable for Paimon instead of dropTable inGravitino (#4806) ### What changes were proposed in this pull request? the dropTable in Paimon will both delete the metadata and data and skip the trash, as discussed in https://github.com/apache/gravitino/issues/1436 , Gravitino Paimon catalog should use purgeTable not dropTable ### Why are the changes needed? https://github.com/apache/gravitino/issues/4814 ### Does this PR introduce any user-facing change? yes, updated the doc. ### How was this patch tested? existing ITs and UTs. --- .../paimon/PaimonCatalogOperations.java | 21 ++++++++++--------- .../paimon/ops/PaimonCatalogOps.java | 2 +- .../paimon/TestGravitinoPaimonTable.java | 4 ++-- .../integration/test/CatalogPaimonBaseIT.java | 4 ++-- .../CatalogPaimonKerberosFilesystemIT.java | 4 ++-- .../paimon/ops/TestPaimonCatalogOps.java | 4 ++-- docs/lakehouse-paimon-catalog.md | 19 ++++++++++------- 7 files changed, 31 insertions(+), 27 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java index 99487fe87c1..fbe6f4be49d 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java @@ -438,15 +438,8 @@ public GravitinoPaimonTable alterTable(NameIdentifier identifier, TableChange... */ @Override public boolean dropTable(NameIdentifier identifier) { - try { - NameIdentifier tableIdentifier = buildPaimonNameIdentifier(identifier); - paimonCatalogOps.dropTable(tableIdentifier.toString()); - } catch (Catalog.TableNotExistException e) { - LOG.warn("Paimon table {} does not exist.", identifier); - return false; - } - LOG.info("Dropped Paimon table {}.", identifier); - return true; + throw new UnsupportedOperationException( + "Paimon dropTable will both remove the metadata and data, please use purgeTable instead in Gravitino."); } /** @@ -458,7 +451,15 @@ public boolean dropTable(NameIdentifier identifier) { */ @Override public boolean purgeTable(NameIdentifier identifier) throws UnsupportedOperationException { - throw new UnsupportedOperationException("purgeTable is unsupported now for Paimon Catalog."); + try { + NameIdentifier tableIdentifier = buildPaimonNameIdentifier(identifier); + paimonCatalogOps.purgeTable(tableIdentifier.toString()); + } catch (Catalog.TableNotExistException e) { + LOG.warn("Paimon table {} does not exist.", identifier); + return false; + } + LOG.info("Purged Paimon table {}.", identifier); + return true; } @Override diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/PaimonCatalogOps.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/PaimonCatalogOps.java index d009c8bb64c..a829b322a37 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/PaimonCatalogOps.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/PaimonCatalogOps.java @@ -90,7 +90,7 @@ public void createTable(String tableName, Schema schema) catalog.createTable(tableIdentifier(tableName), schema, false); } - public void dropTable(String tableName) throws TableNotExistException { + public void purgeTable(String tableName) throws TableNotExistException { catalog.dropTable(tableIdentifier(tableName), false); } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/TestGravitinoPaimonTable.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/TestGravitinoPaimonTable.java index 3a522822adb..05e36219d99 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/TestGravitinoPaimonTable.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/TestGravitinoPaimonTable.java @@ -101,7 +101,7 @@ void resetSchema() { return NameIdentifier.of( Namespace.of(levels[levels.length - 1]), nameIdentifier.name()); }) - .forEach(nameIdentifier -> paimonCatalogOperations.dropTable(nameIdentifier)); + .forEach(nameIdentifier -> paimonCatalogOperations.purgeTable(nameIdentifier)); } paimonCatalogOperations.dropSchema(schemaIdent, false); initPaimonSchema(); @@ -358,7 +358,7 @@ void testDropPaimonTable() { new SortOrder[0]); Assertions.assertTrue(paimonCatalogOperations.tableExists(tableIdentifier)); - paimonCatalogOperations.dropTable(tableIdentifier); + paimonCatalogOperations.purgeTable(tableIdentifier); Assertions.assertFalse(paimonCatalogOperations.tableExists(tableIdentifier)); } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index a8f321f4243..e2cb6d6e168 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -595,13 +595,13 @@ void testListAndDropPaimonTable() throws DatabaseNotExistException { Assertions.assertEquals("table_1", tableIdentifiers.get(0)); Assertions.assertEquals("table_2", tableIdentifiers.get(1)); - Assertions.assertDoesNotThrow(() -> tableCatalog.dropTable(table1)); + Assertions.assertDoesNotThrow(() -> tableCatalog.purgeTable(table1)); nameIdentifiers = tableCatalog.listTables(Namespace.of(schemaName)); Assertions.assertEquals(1, nameIdentifiers.length); Assertions.assertEquals("table_2", nameIdentifiers[0].name()); - Assertions.assertDoesNotThrow(() -> tableCatalog.dropTable(table2)); + Assertions.assertDoesNotThrow(() -> tableCatalog.purgeTable(table2)); Namespace schemaNamespace = Namespace.of(schemaName); nameIdentifiers = tableCatalog.listTables(schemaNamespace); Assertions.assertEquals(0, nameIdentifiers.length); diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonKerberosFilesystemIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonKerberosFilesystemIT.java index 233c8aff764..f9f31ceadfe 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonKerberosFilesystemIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonKerberosFilesystemIT.java @@ -266,8 +266,8 @@ void testPaimonWithKerberos() { Table loadTable = catalog.asTableCatalog().loadTable(tableNameIdentifier); Assertions.assertEquals(loadTable.name(), tableNameIdentifier.name()); - // Drop table - catalog.asTableCatalog().dropTable(tableNameIdentifier); + // Purge table + catalog.asTableCatalog().purgeTable(tableNameIdentifier); Assertions.assertFalse(catalog.asTableCatalog().tableExists(tableNameIdentifier)); // Drop schema diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/TestPaimonCatalogOps.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/TestPaimonCatalogOps.java index dbae9c71321..ca741c67b60 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/TestPaimonCatalogOps.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/ops/TestPaimonCatalogOps.java @@ -153,10 +153,10 @@ void testLoadListAndDropTableOperations() throws Exception { assertEquals(OPTIONS.get(BUCKET.key()), table.options().get(BUCKET.key())); // drop table - Assertions.assertDoesNotThrow(() -> paimonCatalogOps.dropTable(IDENTIFIER.toString())); + Assertions.assertDoesNotThrow(() -> paimonCatalogOps.purgeTable(IDENTIFIER.toString())); Assertions.assertThrowsExactly( Catalog.TableNotExistException.class, - () -> paimonCatalogOps.dropTable(IDENTIFIER.toString())); + () -> paimonCatalogOps.purgeTable(IDENTIFIER.toString())); // list table again Assertions.assertEquals( diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 2b669ff0be6..6eabd3e8fcd 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -59,9 +59,9 @@ Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational ### Schema properties -- Doesn't support specify location and store any schema properties when createSchema for FilesystemCatalog now. -- Doesn't return any schema properties when loadSchema for FilesystemCatalog now. -- Doesn't support store schema comment for FilesystemCatalog now. +- Doesn't support specify location and store any schema properties when createSchema for FilesystemCatalog. +- Doesn't return any schema properties when loadSchema for FilesystemCatalog. +- Doesn't support store schema comment for FilesystemCatalog. ### Schema operations @@ -71,20 +71,23 @@ Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational ### Table capabilities -- Supporting createTable, dropTable, alterTable, loadTable and listTable. -``` -dropTable will delete the table location directly, similar with purgeTable. -``` +- Supporting createTable, purgeTable, alterTable, loadTable and listTable. - Supporting Column default value through table properties, such as `fields.{columnName}.default-value`, not column expression. - + +- Doesn't support dropTable. - Doesn't support table distribution and sort orders. +:::info +Gravitino Paimon Catalog does not support dropTable, because the dropTable in Paimon will both remove the table metadata and the table location from the file system and skip the trash, we should use purgeTable instead in Gravitino. +::: + :::info Paimon does not support auto increment column. ::: #### Table changes +- RenameTable - AddColumn - DeleteColumn - RenameColumn