Skip to content

Commit

Permalink
add some protective codes to table metrics
Browse files Browse the repository at this point in the history
Signed-off-by: silverbullet233 <[email protected]>
  • Loading branch information
silverbullet233 committed Feb 12, 2025
1 parent ca8ee31 commit 9ca8ad7
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 4 deletions.
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1566,4 +1566,6 @@ CONF_mInt32(json_parse_many_batch_size, "1000000");
CONF_mBool(enable_dynamic_batch_size_for_json_parse_many, "true");
CONF_mInt32(put_combined_txn_log_thread_pool_num_max, "64");
CONF_mBool(enable_put_combinded_txn_log_parallel, "false");
// used to control whether the metrics/ interface collects table metrics
CONF_mBool(enable_collect_table_metrics, "true");
} // namespace starrocks::config
1 change: 0 additions & 1 deletion be/src/exec/tablet_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
#include "util/compression/compression_utils.h"
#include "util/defer_op.h"
#include "util/stack_util.h"
#include "util/starrocks_metrics.h"
#include "util/thread.h"
#include "util/thrift_rpc_helper.h"
#include "util/uid_util.h"
Expand Down
7 changes: 7 additions & 0 deletions be/src/http/action/metrics_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ const std::string SimpleCoreMetricsVisitor::MAX_DISK_IO_UTIL_PERCENT = "max_disk
const std::string SimpleCoreMetricsVisitor::MAX_NETWORK_SEND_BYTES_RATE = "max_network_send_bytes_rate";
const std::string SimpleCoreMetricsVisitor::MAX_NETWORK_RECEIVE_BYTES_RATE = "max_network_receive_bytes_rate";

const std::string TableMetricsPrefix = "table_";
void PrometheusMetricsVisitor::visit(const std::string& prefix, const std::string& name, MetricCollector* collector) {
if (collector->empty() || name.empty()) {
return;
Expand All @@ -127,6 +128,9 @@ void PrometheusMetricsVisitor::visit(const std::string& prefix, const std::strin
} else {
metric_name = prefix + "_" + name;
}
if (!config::enable_collect_table_metrics && name.starts_with(TableMetricsPrefix)) {
return;
}
// Output metric type
_ss << "# TYPE " << metric_name << " " << collector->type() << "\n";
switch (collector->type()) {
Expand Down Expand Up @@ -290,6 +294,9 @@ void JsonMetricsVisitor::visit(const std::string& prefix, const std::string& nam
if (collector->empty() || name.empty()) {
return;
}
if (!config::enable_collect_table_metrics && name.starts_with(TableMetricsPrefix)) {
return;
}

rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
switch (collector->type()) {
Expand Down
7 changes: 6 additions & 1 deletion be/src/service/staros_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,14 @@ StarOSWorker::StarOSWorker() : _mtx(), _shards(), _fs_cache(new_lru_cache(1024))

StarOSWorker::~StarOSWorker() = default;

static const uint64_t kUnknownTableId = UINT64_MAX;
uint64_t StarOSWorker::get_table_id(const ShardInfo& shard) {
const auto& properties = shard.properties;
CHECK(properties.contains("tableId"));
auto iter = properties.find("tableId");
if (iter == properties.end()) {
DCHECK(false) << "tableId doesn't exist in shard properties";
return kUnknownTableId;
}
return std::stoull(properties.at("tableId"));
}

Expand Down
12 changes: 10 additions & 2 deletions be/src/util/table_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,11 @@ class TableMetricsManager {

TableMetricsPtr get_table_metrics(uint64_t table_id) {
std::shared_lock l(_mu);
DCHECK(_metrics_map.contains(table_id));
return _metrics_map.at(table_id);
auto iter = _metrics_map.find(table_id);
if (iter != _metrics_map.end()) {
return iter->second;
}
return _blackhole_metrics;
}

void cleanup();
Expand All @@ -79,6 +82,11 @@ class TableMetricsManager {
MetricRegistry* _metrics;
std::shared_mutex _mu;
phmap::flat_hash_map<uint64_t, TableMetricsPtr> _metrics_map;
// In some cases, we may not be able to obtain the metrics for the corresponding table id,
// For example, when drop tablet and data load concurrently,
// the Tablets may have been deleted before the load begins, and the table metrics may be cleared.
// In such a scenario, we return blackhole metrics to ensure that subsequent processes can work well.
TableMetricsPtr _blackhole_metrics = std::make_shared<TableMetrics>();
// used for cleanup
int64_t _last_cleanup_ts = 0;

Expand Down

0 comments on commit 9ca8ad7

Please sign in to comment.