Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] add some defensive codes to table metrics #55839

Merged
merged 2 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1563,4 +1563,6 @@ CONF_mInt32(json_parse_many_batch_size, "1000000");
CONF_mBool(enable_dynamic_batch_size_for_json_parse_many, "true");
CONF_mInt32(put_combined_txn_log_thread_pool_num_max, "64");
CONF_mBool(enable_put_combinded_txn_log_parallel, "false");
// used to control whether the metrics/ interface collects table metrics
CONF_mBool(enable_collect_table_metrics, "true");
} // namespace starrocks::config
1 change: 0 additions & 1 deletion be/src/exec/tablet_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
#include "util/compression/compression_utils.h"
#include "util/defer_op.h"
#include "util/stack_util.h"
#include "util/starrocks_metrics.h"
#include "util/thread.h"
#include "util/thrift_rpc_helper.h"
#include "util/uid_util.h"
Expand Down
7 changes: 7 additions & 0 deletions be/src/http/action/metrics_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ const std::string SimpleCoreMetricsVisitor::MAX_DISK_IO_UTIL_PERCENT = "max_disk
const std::string SimpleCoreMetricsVisitor::MAX_NETWORK_SEND_BYTES_RATE = "max_network_send_bytes_rate";
const std::string SimpleCoreMetricsVisitor::MAX_NETWORK_RECEIVE_BYTES_RATE = "max_network_receive_bytes_rate";

const std::string TableMetricsPrefix = "table_";
void PrometheusMetricsVisitor::visit(const std::string& prefix, const std::string& name, MetricCollector* collector) {
if (collector->empty() || name.empty()) {
return;
Expand All @@ -127,6 +128,9 @@ void PrometheusMetricsVisitor::visit(const std::string& prefix, const std::strin
} else {
metric_name = prefix + "_" + name;
}
if (!config::enable_collect_table_metrics && name.starts_with(TableMetricsPrefix)) {
return;
}
// Output metric type
_ss << "# TYPE " << metric_name << " " << collector->type() << "\n";
switch (collector->type()) {
Expand Down Expand Up @@ -290,6 +294,9 @@ void JsonMetricsVisitor::visit(const std::string& prefix, const std::string& nam
if (collector->empty() || name.empty()) {
return;
}
if (!config::enable_collect_table_metrics && name.starts_with(TableMetricsPrefix)) {
return;
}

rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
switch (collector->type()) {
Expand Down
15 changes: 13 additions & 2 deletions be/src/service/staros_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,21 @@ StarOSWorker::StarOSWorker() : _mtx(), _shards(), _fs_cache(new_lru_cache(1024))

StarOSWorker::~StarOSWorker() = default;

static const uint64_t kUnknownTableId = UINT64_MAX;
uint64_t StarOSWorker::get_table_id(const ShardInfo& shard) {
const auto& properties = shard.properties;
CHECK(properties.contains("tableId"));
return std::stoull(properties.at("tableId"));
auto iter = properties.find("tableId");
if (iter == properties.end()) {
DCHECK(false) << "tableId doesn't exist in shard properties";
return kUnknownTableId;
}
const auto& tableId = properties.at("tableId");
try {
return std::stoull(tableId);
} catch (const std::exception& e) {
DCHECK(false) << "failed to parse tableId: " << tableId << ", " << e.what();
return kUnknownTableId;
}
}

absl::Status StarOSWorker::add_shard(const ShardInfo& shard) {
Expand Down
12 changes: 10 additions & 2 deletions be/src/util/table_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,11 @@ class TableMetricsManager {

TableMetricsPtr get_table_metrics(uint64_t table_id) {
std::shared_lock l(_mu);
DCHECK(_metrics_map.contains(table_id));
return _metrics_map.at(table_id);
auto iter = _metrics_map.find(table_id);
if (iter != _metrics_map.end()) {
return iter->second;
}
return _blackhole_metrics;
}

void cleanup();
Expand All @@ -79,6 +82,11 @@ class TableMetricsManager {
MetricRegistry* _metrics;
std::shared_mutex _mu;
phmap::flat_hash_map<uint64_t, TableMetricsPtr> _metrics_map;
// In some cases, we may not be able to obtain the metrics for the corresponding table id,
// For example, when drop tablet and data load concurrently,
// the Tablets may have been deleted before the load begins, and the table metrics may be cleared.
// In such a scenario, we return blackhole metrics to ensure that subsequent processes can work well.
TableMetricsPtr _blackhole_metrics = std::make_shared<TableMetrics>();
// used for cleanup
int64_t _last_cleanup_ts = 0;

Expand Down
Loading