Skip to content

Commit

Permalink
[improve](statistics)Clean expired TableStatsMeta. (apache#39779)
Browse files Browse the repository at this point in the history
Drop catalog and older version of drop database (before
apache#39685), will not remove table stats
object in memory. Need a background thread to clean the garbage.
  • Loading branch information
Jibing-Li committed Sep 2, 2024
1 parent 98e039a commit 92f439e
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 10 deletions.
11 changes: 8 additions & 3 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4364,14 +4364,19 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
| KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null);
RESULT = new ShowTableStatsStmt(tbl, partitionNames, null);
:}
/* show table id stats */
| KW_TABLE KW_STATS INTEGER_LITERAL:tableId
{:
RESULT = new ShowTableStatsStmt(tableId);
:}
/* show index stats */
| KW_INDEX KW_STATS table_name:tbl ident:id
{:
RESULT = new ShowTableStatsStmt(tbl, null, false, id);
RESULT = new ShowTableStatsStmt(tbl, null, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,26 @@ public class ShowTableStatsStmt extends ShowStmt {

private final TableName tableName;
private final PartitionNames partitionNames;
private final boolean cached;
private final String indexName;
private final long tableId;
private final boolean useTableId;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) {
public ShowTableStatsStmt(long tableId) {
this.tableName = null;
this.partitionNames = null;
this.indexName = null;
this.tableId = tableId;
this.useTableId = true;
}

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, String indexName) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
this.indexName = indexName;
this.tableId = -1;
this.useTableId = false;
}

public TableName getTableName() {
Expand All @@ -87,6 +97,13 @@ public TableName getTableName() {
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
if (useTableId) {
if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied",
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP());
}
return;
}
tableName.analyze(analyzer);
if (partitionNames != null) {
partitionNames.analyze(analyzer);
Expand Down Expand Up @@ -142,13 +159,25 @@ public TableIf getTable() {
return table;
}

public boolean isUseTableId() {
return useTableId;
}

public long getTableId() {
return tableId;
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
}
return constructTableResultSet(tableStatistic);
}

public ShowResultSet constructEmptyResultSet() {
return new ShowResultSet(getMetaData(), new ArrayList<>());
}

public ShowResultSet constructResultSet(long rowCount) {
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
Expand Down Expand Up @@ -208,8 +237,4 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
}
11 changes: 11 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2512,6 +2512,17 @@ private void handleShowDataSkew() throws AnalysisException {
private void handleShowTableStats() {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
TableIf tableIf = showTableStatsStmt.getTable();
// Handle use table id to show table stats. Mainly for online debug.
if (showTableStatsStmt.isUseTableId()) {
long tableId = showTableStatsStmt.getTableId();
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableId);
if (tableStats == null) {
resultSet = showTableStatsStmt.constructEmptyResultSet();
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
}
return;
}
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
/*
tableStats == null means it's not analyzed, in this case show the estimated row count.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -1103,6 +1104,10 @@ public void removeTableStats(long tableId) {
}
}

public Set<Long> getIdToTblStatsKeys() {
return new HashSet<>(idToTblStats.keySet());
}

public ColStatsMeta findColStatsMeta(long tblId, String indexName, String colName) {
TableStatsMeta tableStats = findTableStatsStatus(tblId);
if (tableStats == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.statistics;

import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndexMeta;
Expand All @@ -27,6 +28,7 @@
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.persist.TableStatsDeletionLog;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.collect.Maps;
Expand Down Expand Up @@ -74,6 +76,7 @@ protected void runAfterCatalogReady() {
}

public synchronized void clear() {
clearTableStats();
try {
if (!init()) {
return;
Expand All @@ -100,6 +103,59 @@ private void clearStats(OlapTable statsTbl) {
} while (!expiredStats.isEmpty());
}

private void clearTableStats() {
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
Set<Long> tableIds = analysisManager.getIdToTblStatsKeys();
InternalCatalog internalCatalog = Env.getCurrentInternalCatalog();
for (long id : tableIds) {
try {
TableStatsMeta stats = analysisManager.findTableStatsStatus(id);
if (stats == null) {
continue;
}
// If ctlName, dbName and tblName exist, it means the table stats is created under new version.
// First try to find the table by the given names. If table exists, means the tableMeta is valid,
// it should be kept in memory.
try {
StatisticsUtil.findTable(stats.ctlName, stats.dbName, stats.tblName);
continue;
} catch (Exception e) {
LOG.debug("Table {}.{}.{} not found.", stats.ctlName, stats.dbName, stats.tblName);
}
// If we couldn't find table by names, try to find it in internal catalog. This is to support older
// version which the tableStats object doesn't store the names but only table id.
// We may remove external table's tableStats here, but it's not a big problem.
// Because the stats in column_statistics table is still available,
// the only disadvantage is auto analyze may be triggered for this table.
// But it only happens once, the new table stats object will have all the catalog, db and table names.
if (tableExistInInternalCatalog(internalCatalog, id)) {
continue;
}
LOG.info("Table {}.{}.{} with id {} not exist, remove its table stats record.",
stats.ctlName, stats.dbName, stats.tblName, id);
analysisManager.removeTableStats(id);
Env.getCurrentEnv().getEditLog().logDeleteTableStats(new TableStatsDeletionLog(id));
} catch (Exception e) {
LOG.info(e);
}
}
}

private boolean tableExistInInternalCatalog(InternalCatalog internalCatalog, long tableId) {
List<Long> dbIds = internalCatalog.getDbIds();
for (long dbId : dbIds) {
Database database = internalCatalog.getDbNullable(dbId);
if (database == null) {
continue;
}
TableIf table = database.getTableNullable(tableId);
if (table != null) {
return true;
}
}
return false;
}

private boolean init() {
try {
String dbName = FeConstants.INTERNAL_DB_NAME;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,24 @@

public class TableStatsMeta implements Writable, GsonPostProcessable {

@SerializedName("ctlId")
public final long ctlId;

@SerializedName("ctln")
public final String ctlName;

@SerializedName("dbId")
public final long dbId;

@SerializedName("dbn")
public final String dbName;

@SerializedName("tblId")
public final long tblId;

@SerializedName("tbln")
public final String tblName;

@SerializedName("idxId")
public final long idxId;
@SerializedName("updatedRows")
Expand Down Expand Up @@ -84,14 +99,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {

@VisibleForTesting
public TableStatsMeta() {
ctlId = 0;
ctlName = null;
dbId = 0;
dbName = null;
tblId = 0;
tblName = null;
idxId = 0;
}

// It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo
// and TableStats is quite different.
public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) {
this.ctlId = table.getDatabase().getCatalog().getId();
this.ctlName = table.getDatabase().getCatalog().getName();
this.dbId = table.getDatabase().getId();
this.dbName = table.getDatabase().getFullName();
this.tblId = table.getId();
this.tblName = table.getName();
this.idxId = -1;
this.rowCount = rowCount;
update(analyzedJob, table);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.doris.analysis.ShowAnalyzeStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.Config;
Expand Down Expand Up @@ -262,6 +264,7 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) {

@Test
public void testReAnalyze() {
Database db = new Database();
new MockUp<OlapTable>() {

int count = 0;
Expand All @@ -283,6 +286,11 @@ public List<Column> getColumns() {
return Lists.newArrayList(c);
}

@Mock
public DatabaseIf getDatabase() {
return db;
}

};
OlapTable olapTable = new OlapTable();
TableStatsMeta stats0 = new TableStatsMeta(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_drop_expired_table_stats", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return
}

for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_drop_expired_table_stats"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""


sql """use stats_test"""
sql """analyze table employee_gz with sync"""
def result = sql """show table stats employee_gz"""
assertEquals(1, result.size())

def ctlId
def dbId
def tblId
result = sql """show catalogs"""

for (int i = 0; i < result.size(); i++) {
if (result[i][1] == catalog_name) {
ctlId = result[i][0]
}
}
logger.info("catalog id is " + ctlId)
result = sql """show proc '/catalogs/$ctlId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'stats_test') {
dbId = result[i][0]
}
}
logger.info("db id is " + dbId)
result = sql """show proc '/catalogs/$ctlId/$dbId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'employee_gz') {
tblId = result[i][0]
}
}
logger.info("table id is " + tblId)
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(1, result.size())

sql """drop catalog ${catalog_name}"""
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(1, result.size())

try {
sql """drop expired stats"""
} catch (Exception e) {
logger.info("Drop expired stats exception. " + e.getMessage())
}
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(0, result.size())
}
}

0 comments on commit 92f439e

Please sign in to comment.