From 92f439e13bb560610d6d7b8449d971a7a873769b Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:11:13 +0800 Subject: [PATCH] [improve](statistics)Clean expired TableStatsMeta. (#39779) Drop catalog and older version of drop database (before https://github.com/apache/doris/pull/39685), will not remove table stats object in memory. Need a background thread to clean the garbage. --- fe/fe-core/src/main/cup/sql_parser.cup | 11 ++- .../doris/analysis/ShowTableStatsStmt.java | 39 ++++++-- .../org/apache/doris/qe/ShowExecutor.java | 11 +++ .../doris/statistics/AnalysisManager.java | 5 ++ .../doris/statistics/StatisticsCleaner.java | 56 ++++++++++++ .../doris/statistics/TableStatsMeta.java | 25 ++++++ .../doris/statistics/AnalysisManagerTest.java | 8 ++ .../hive/test_drop_expired_table_stats.groovy | 90 +++++++++++++++++++ 8 files changed, 235 insertions(+), 10 deletions(-) create mode 100644 regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 1cbd28ee43f5f4..a7f4326047258f 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4364,14 +4364,19 @@ show_param ::= RESULT = new ShowSyncJobStmt(dbName); :} /* show table stats */ - | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames + | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, null); + :} + /* show table id stats */ + | KW_TABLE KW_STATS INTEGER_LITERAL:tableId + {: + RESULT = new ShowTableStatsStmt(tableId); :} /* show index stats */ | KW_INDEX KW_STATS table_name:tbl ident:id {: - RESULT = new ShowTableStatsStmt(tbl, null, false, id); + RESULT = new ShowTableStatsStmt(tbl, null, id); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index fc0860dfd13d07..8a1feab28507b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -68,16 +68,26 @@ public class ShowTableStatsStmt extends ShowStmt { private final TableName tableName; private final PartitionNames partitionNames; - private final boolean cached; private final String indexName; + private final long tableId; + private final boolean useTableId; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) { + public ShowTableStatsStmt(long tableId) { + this.tableName = null; + this.partitionNames = null; + this.indexName = null; + this.tableId = tableId; + this.useTableId = true; + } + + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, String indexName) { this.tableName = tableName; this.partitionNames = partitionNames; - this.cached = cached; this.indexName = indexName; + this.tableId = -1; + this.useTableId = false; } public TableName getTableName() { @@ -87,6 +97,13 @@ public TableName getTableName() { @Override public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); + if (useTableId) { + if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied", + ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP()); + } + return; + } tableName.analyze(analyzer); if (partitionNames != null) { partitionNames.analyze(analyzer); @@ -142,6 +159,14 @@ public TableIf getTable() { return table; } + public boolean isUseTableId() { + return useTableId; + } + + public long getTableId() { + return tableId; + } + public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { if (indexName != null) { return constructIndexResultSet(tableStatistic); @@ -149,6 +174,10 @@ public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { return constructTableResultSet(tableStatistic); } + public ShowResultSet constructEmptyResultSet() { + return new ShowResultSet(getMetaData(), new ArrayList<>()); + } + public ShowResultSet constructResultSet(long rowCount) { List> result = Lists.newArrayList(); List row = Lists.newArrayList(); @@ -208,8 +237,4 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) { result.add(row); return new ShowResultSet(getMetaData(), result); } - - public boolean isCached() { - return cached; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index f8d73108b17d8b..683e7a7fc1e595 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2512,6 +2512,17 @@ private void handleShowDataSkew() throws AnalysisException { private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); + // Handle use table id to show table stats. Mainly for online debug. + if (showTableStatsStmt.isUseTableId()) { + long tableId = showTableStatsStmt.getTableId(); + TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableId); + if (tableStats == null) { + resultSet = showTableStatsStmt.constructEmptyResultSet(); + } else { + resultSet = showTableStatsStmt.constructResultSet(tableStats); + } + return; + } TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId()); /* tableStats == null means it's not analyzed, in this case show the estimated row count. diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index d9cd312aba2243..204b60cfd03afb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -84,6 +84,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -1103,6 +1104,10 @@ public void removeTableStats(long tableId) { } } + public Set getIdToTblStatsKeys() { + return new HashSet<>(idToTblStats.keySet()); + } + public ColStatsMeta findColStatsMeta(long tblId, String indexName, String colName) { TableStatsMeta tableStats = findTableStatsStatus(tblId); if (tableStats == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index 08db363d2db134..2dbdca39b58000 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MaterializedIndexMeta; @@ -27,6 +28,7 @@ import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Maps; @@ -74,6 +76,7 @@ protected void runAfterCatalogReady() { } public synchronized void clear() { + clearTableStats(); try { if (!init()) { return; @@ -100,6 +103,59 @@ private void clearStats(OlapTable statsTbl) { } while (!expiredStats.isEmpty()); } + private void clearTableStats() { + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + Set tableIds = analysisManager.getIdToTblStatsKeys(); + InternalCatalog internalCatalog = Env.getCurrentInternalCatalog(); + for (long id : tableIds) { + try { + TableStatsMeta stats = analysisManager.findTableStatsStatus(id); + if (stats == null) { + continue; + } + // If ctlName, dbName and tblName exist, it means the table stats is created under new version. + // First try to find the table by the given names. If table exists, means the tableMeta is valid, + // it should be kept in memory. + try { + StatisticsUtil.findTable(stats.ctlName, stats.dbName, stats.tblName); + continue; + } catch (Exception e) { + LOG.debug("Table {}.{}.{} not found.", stats.ctlName, stats.dbName, stats.tblName); + } + // If we couldn't find table by names, try to find it in internal catalog. This is to support older + // version which the tableStats object doesn't store the names but only table id. + // We may remove external table's tableStats here, but it's not a big problem. + // Because the stats in column_statistics table is still available, + // the only disadvantage is auto analyze may be triggered for this table. + // But it only happens once, the new table stats object will have all the catalog, db and table names. + if (tableExistInInternalCatalog(internalCatalog, id)) { + continue; + } + LOG.info("Table {}.{}.{} with id {} not exist, remove its table stats record.", + stats.ctlName, stats.dbName, stats.tblName, id); + analysisManager.removeTableStats(id); + Env.getCurrentEnv().getEditLog().logDeleteTableStats(new TableStatsDeletionLog(id)); + } catch (Exception e) { + LOG.info(e); + } + } + } + + private boolean tableExistInInternalCatalog(InternalCatalog internalCatalog, long tableId) { + List dbIds = internalCatalog.getDbIds(); + for (long dbId : dbIds) { + Database database = internalCatalog.getDbNullable(dbId); + if (database == null) { + continue; + } + TableIf table = database.getTableNullable(tableId); + if (table != null) { + return true; + } + } + return false; + } + private boolean init() { try { String dbName = FeConstants.INTERNAL_DB_NAME; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 0f71ff691b0407..aa3d1e6a4e53a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -44,9 +44,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { + @SerializedName("ctlId") + public final long ctlId; + + @SerializedName("ctln") + public final String ctlName; + + @SerializedName("dbId") + public final long dbId; + + @SerializedName("dbn") + public final String dbName; + @SerializedName("tblId") public final long tblId; + @SerializedName("tbln") + public final String tblName; + @SerializedName("idxId") public final long idxId; @SerializedName("updatedRows") @@ -84,14 +99,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { @VisibleForTesting public TableStatsMeta() { + ctlId = 0; + ctlName = null; + dbId = 0; + dbName = null; tblId = 0; + tblName = null; idxId = 0; } // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo // and TableStats is quite different. public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { + this.ctlId = table.getDatabase().getCatalog().getId(); + this.ctlName = table.getDatabase().getCatalog().getName(); + this.dbId = table.getDatabase().getId(); + this.dbName = table.getDatabase().getFullName(); this.tblId = table.getId(); + this.tblName = table.getName(); this.idxId = -1; this.rowCount = rowCount; update(analyzedJob, table); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 012ab383fd7484..6b4738a31055f1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -23,6 +23,8 @@ import org.apache.doris.analysis.ShowAnalyzeStmt; import org.apache.doris.analysis.TableName; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.Config; @@ -262,6 +264,7 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) { @Test public void testReAnalyze() { + Database db = new Database(); new MockUp() { int count = 0; @@ -283,6 +286,11 @@ public List getColumns() { return Lists.newArrayList(c); } + @Mock + public DatabaseIf getDatabase() { + return db; + } + }; OlapTable olapTable = new OlapTable(); TableStatsMeta stats0 = new TableStatsMeta( diff --git a/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy new file mode 100644 index 00000000000000..dbbe014e28bb6b --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_drop_expired_table_stats", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_test_drop_expired_table_stats" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + + + sql """use stats_test""" + sql """analyze table employee_gz with sync""" + def result = sql """show table stats employee_gz""" + assertEquals(1, result.size()) + + def ctlId + def dbId + def tblId + result = sql """show catalogs""" + + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == catalog_name) { + ctlId = result[i][0] + } + } + logger.info("catalog id is " + ctlId) + result = sql """show proc '/catalogs/$ctlId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'stats_test') { + dbId = result[i][0] + } + } + logger.info("db id is " + dbId) + result = sql """show proc '/catalogs/$ctlId/$dbId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'employee_gz') { + tblId = result[i][0] + } + } + logger.info("table id is " + tblId) + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(1, result.size()) + + sql """drop catalog ${catalog_name}""" + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(1, result.size()) + + try { + sql """drop expired stats""" + } catch (Exception e) { + logger.info("Drop expired stats exception. " + e.getMessage()) + } + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(0, result.size()) + } +} +