Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improve](statistics)Clean expired TableStatsMeta. #39779

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4604,14 +4604,19 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames opt_col_list:cols
Jibing-Li marked this conversation as resolved.
Show resolved Hide resolved
| KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames opt_col_list:cols
{:
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached, null);
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, null);
:}
/* show table id stats */
| KW_TABLE KW_STATS INTEGER_LITERAL:tableId
{:
RESULT = new ShowTableStatsStmt(tableId);
:}
/* show index stats */
| KW_INDEX KW_STATS table_name:tbl ident:id
{:
RESULT = new ShowTableStatsStmt(tbl, null, null, false, id);
RESULT = new ShowTableStatsStmt(tbl, null, null, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,29 @@ public class ShowTableStatsStmt extends ShowStmt {
private final TableName tableName;
private final List<String> columnNames;
private final PartitionNames partitionNames;
private final boolean cached;
private final String indexName;
private final long tableId;
private final boolean useTableId;

private TableIf table;

public ShowTableStatsStmt(long tableId) {
this.tableName = null;
this.columnNames = null;
this.partitionNames = null;
this.indexName = null;
this.tableId = tableId;
this.useTableId = true;
}

public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
PartitionNames partitionNames, boolean cached, String indexName) {
PartitionNames partitionNames, String indexName) {
this.tableName = tableName;
this.columnNames = columnNames;
this.partitionNames = partitionNames;
this.cached = cached;
this.indexName = indexName;
this.tableId = -1;
this.useTableId = false;
}

public TableName getTableName() {
Expand All @@ -110,6 +121,13 @@ public TableName getTableName() {
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
if (useTableId) {
if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied",
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP());
}
return;
}
tableName.analyze(analyzer);
if (partitionNames != null) {
partitionNames.analyze(analyzer);
Expand Down Expand Up @@ -171,6 +189,14 @@ public TableIf getTable() {
return table;
}

public boolean isUseTableId() {
return useTableId;
}

public long getTableId() {
return tableId;
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
Expand All @@ -185,6 +211,10 @@ public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
}
}

public ShowResultSet constructEmptyResultSet() {
return new ShowResultSet(getMetaData(), new ArrayList<>());
}

public ShowResultSet constructResultSet(long rowCount) {
List<List<String>> result = Lists.newArrayList();
if (partitionNames != null) {
Expand Down Expand Up @@ -313,8 +343,4 @@ public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta tableStati
}
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
}
11 changes: 11 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2712,6 +2712,17 @@ private void handleShowDataSkew() throws AnalysisException {
private void handleShowTableStats() {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
TableIf tableIf = showTableStatsStmt.getTable();
// Handle use table id to show table stats. Mainly for online debug.
if (showTableStatsStmt.isUseTableId()) {
long tableId = showTableStatsStmt.getTableId();
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableId);
if (tableStats == null) {
resultSet = showTableStatsStmt.constructEmptyResultSet();
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
}
return;
}
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
/*
tableStats == null means it's not analyzed, in this case show the estimated row count.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,10 @@ public void removeTableStats(long tableId) {
}
}

public Set<Long> getIdToTblStatsKeys() {
return new HashSet<>(idToTblStats.keySet());
}

public ColStatsMeta findColStatsMeta(long tblId, String indexName, String colName) {
TableStatsMeta tableStats = findTableStatsStatus(tblId);
if (tableStats == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.statistics;

import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndexMeta;
Expand All @@ -27,6 +28,7 @@
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.persist.TableStatsDeletionLog;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.collect.Maps;
Expand Down Expand Up @@ -74,6 +76,7 @@ protected void runAfterCatalogReady() {
}

public synchronized void clear() {
clearTableStats();
try {
if (!init()) {
return;
Expand All @@ -99,6 +102,59 @@ private void clearStats(OlapTable statsTbl, boolean isTableColumnStats) {
} while (!expiredStats.isEmpty());
}

private void clearTableStats() {
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
Set<Long> tableIds = analysisManager.getIdToTblStatsKeys();
InternalCatalog internalCatalog = Env.getCurrentInternalCatalog();
for (long id : tableIds) {
try {
TableStatsMeta stats = analysisManager.findTableStatsStatus(id);
if (stats == null) {
continue;
}
// If ctlName, dbName and tblName exist, it means the table stats is created under new version.
// First try to find the table by the given names. If table exists, means the tableMeta is valid,
// it should be kept in memory.
try {
StatisticsUtil.findTable(stats.ctlName, stats.dbName, stats.tblName);
continue;
} catch (Exception e) {
LOG.debug("Table {}.{}.{} not found.", stats.ctlName, stats.dbName, stats.tblName);
}
// If we couldn't find table by names, try to find it in internal catalog. This is to support older
// version which the tableStats object doesn't store the names but only table id.
// We may remove external table's tableStats here, but it's not a big problem.
// Because the stats in column_statistics table is still available,
// the only disadvantage is auto analyze may be triggered for this table.
// But it only happens once, the new table stats object will have all the catalog, db and table names.
if (tableExistInInternalCatalog(internalCatalog, id)) {
continue;
}
LOG.info("Table {}.{}.{} with id {} not exist, remove its table stats record.",
stats.ctlName, stats.dbName, stats.tblName, id);
analysisManager.removeTableStats(id);
Env.getCurrentEnv().getEditLog().logDeleteTableStats(new TableStatsDeletionLog(id));
} catch (Exception e) {
LOG.info(e);
}
}
}

private boolean tableExistInInternalCatalog(InternalCatalog internalCatalog, long tableId) {
List<Long> dbIds = internalCatalog.getDbIds();
for (long dbId : dbIds) {
Database database = internalCatalog.getDbNullable(dbId);
if (database == null) {
continue;
}
TableIf table = database.getTableNullable(tableId);
if (table != null) {
return true;
}
}
return false;
}

private boolean init() {
try {
String dbName = FeConstants.INTERNAL_DB_NAME;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,24 @@

public class TableStatsMeta implements Writable, GsonPostProcessable {

@SerializedName("ctlId")
public final long ctlId;

@SerializedName("ctln")
public final String ctlName;

@SerializedName("dbId")
public final long dbId;

@SerializedName("dbn")
public final String dbName;

@SerializedName("tblId")
public final long tblId;

@SerializedName("tbln")
public final String tblName;

@SerializedName("idxId")
public final long idxId;
@SerializedName("updatedRows")
Expand Down Expand Up @@ -93,14 +108,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {

@VisibleForTesting
public TableStatsMeta() {
ctlId = 0;
ctlName = null;
dbId = 0;
dbName = null;
tblId = 0;
tblName = null;
idxId = 0;
}

// It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo
// and TableStats is quite different.
public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) {
this.ctlId = table.getDatabase().getCatalog().getId();
this.ctlName = table.getDatabase().getCatalog().getName();
this.dbId = table.getDatabase().getId();
this.dbName = table.getDatabase().getFullName();
this.tblId = table.getId();
this.tblName = table.getName();
this.idxId = -1;
this.rowCount = rowCount;
update(analyzedJob, table);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_drop_expired_table_stats", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return
}

for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_drop_expired_table_stats"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""


sql """use stats_test"""
sql """analyze table employee_gz with sync"""
def result = sql """show table stats employee_gz"""
assertEquals(1, result.size())

def ctlId
def dbId
def tblId
result = sql """show catalogs"""

for (int i = 0; i < result.size(); i++) {
if (result[i][1] == catalog_name) {
ctlId = result[i][0]
}
}
logger.info("catalog id is " + ctlId)
result = sql """show proc '/catalogs/$ctlId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'stats_test') {
dbId = result[i][0]
}
}
logger.info("db id is " + dbId)
result = sql """show proc '/catalogs/$ctlId/$dbId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'employee_gz') {
tblId = result[i][0]
}
}
logger.info("table id is " + tblId)
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(1, result.size())

sql """drop catalog ${catalog_name}"""
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(1, result.size())

try {
sql """drop expired stats"""
} catch (Exception e) {
logger.info("Drop expired stats exception. " + e.getMessage())
}
result = sql """show table stats $tblId"""
logger.info("Table stats " + result)
assertEquals(0, result.size())
}
}

Loading