pingcap · ti-chi-bot · Aug 26, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 12, 2024
diff --git a/dbms/src/Common/ProcessCollector.cpp b/dbms/src/Common/ProcessCollector.cpp
@@ -18,44 +18,71 @@
 namespace DB
 {
 
-ProcessCollector::ProcessCollector()
-{
-    auto info = get_process_metrics();
-    start_time.Set(info.start_time);
-}
-
 std::vector<prometheus::MetricFamily> ProcessCollector::Collect() const
 {
     auto new_info = get_process_metrics();
 
-    // Gauge is thread safe, no need to lock.
-    auto past_cpu_total = cpu_total.Value();
-    cpu_total.Increment(new_info.cpu_total - past_cpu_total);
-    vsize.Set(new_info.vsize);
-    rss.Set(new_info.rss);
-
     std::vector<prometheus::MetricFamily> familes;
-    familes.reserve(4);
-    familes.emplace_back(prometheus::MetricFamily{
-        CPU_METRIC_NAME,
-        CPU_METRIC_HELP,
-        prometheus::MetricType::Gauge,
-        std::vector<prometheus::ClientMetric>{cpu_total.Collect()}});
-    familes.emplace_back(prometheus::MetricFamily{
-        VSIZE_METRIC_NAME,
-        VSIZE_METRIC_HELP,
-        prometheus::MetricType::Gauge,
-        std::vector<prometheus::ClientMetric>{vsize.Collect()}});
-    familes.emplace_back(prometheus::MetricFamily{
-        RSS_METRIC_NAME,
-        RSS_METRIC_HELP,
-        prometheus::MetricType::Gauge,
-        std::vector<prometheus::ClientMetric>{rss.Collect()}});
+
+    // The following metrics shadow TiFlash proxy metrics, so that we ensure these metrics are available
+    // in disaggregated mode, where TiFlash proxy may not start at all.
+    // Note that, even in non-disaggregated mode, duplicates are fine when being collected by Prometheus,
+    // because TiFlash proxy and TiFlash have different metrics endpoints. However we will see multiple
+    // endpoints in the Grafana, because both TiFlash proxy and TiFlash uses the same metric name.
+    // To avoid duplicates in Grafana, we will only include proxy metrics when proxy is not enabled.
+    if (include_proxy_metrics)
+    {
+        familes.emplace_back(prometheus::MetricFamily{
+            "tiflash_proxy_process_cpu_seconds_total",
+            "Total user and system CPU time spent in seconds.",
+            prometheus::MetricType::Gauge,
+            {
+                prometheus::ClientMetric{.gauge = {static_cast<double>(new_info.cpu_total)}},
+            }});
+
+        familes.emplace_back(prometheus::MetricFamily{
+            "tiflash_proxy_process_virtual_memory_bytes",
+            "Virtual memory size in bytes.",
+            prometheus::MetricType::Gauge,
+            {
+                prometheus::ClientMetric{.gauge = {static_cast<double>(new_info.vsize)}},
+            }});
+        familes.emplace_back(prometheus::MetricFamily{
+            "tiflash_proxy_process_resident_memory_bytes",
+            "Resident memory size in bytes.",
+            prometheus::MetricType::Gauge,
+            {
+                prometheus::ClientMetric{.gauge = {static_cast<double>(new_info.rss)}},
+            }});
+        familes.emplace_back(prometheus::MetricFamily{
+            "tiflash_proxy_process_start_time_seconds",
+            "Start time of the process since unix epoch in seconds.",
+            prometheus::MetricType::Gauge,
+            {
+                prometheus::ClientMetric{.gauge = {static_cast<double>(new_info.start_time)}},
+            }});
+    }
+
+    // The following metrics are TiFlash specific process metrics.
     familes.emplace_back(prometheus::MetricFamily{
-        START_TIME_METRIC_NAME,
-        START_TIME_METRIC_HELP,
+        "tiflash_process_rss_by_type_bytes",
+        "Resident memory size by type in bytes.",
         prometheus::MetricType::Gauge,
-        std::vector<prometheus::ClientMetric>{start_time.Collect()}});
+        {
+            prometheus::ClientMetric{
+                .label = {{"type", "anon"}},
+                .gauge = {static_cast<double>(new_info.rss_anon)},
+            },
+            prometheus::ClientMetric{
+                .label = {{"type", "file"}},
+                .gauge = {static_cast<double>(new_info.rss_file)},
+            },
+            prometheus::ClientMetric{
+                .label = {{"type", "shared"}},
+                .gauge = {static_cast<double>(new_info.rss_shared)},
+            },
+        }});
+
     return familes;
 }
 

diff --git a/dbms/src/Common/ProcessCollector.h b/dbms/src/Common/ProcessCollector.h
@@ -14,8 +14,10 @@
 
 #pragma once
 
+#include <Common/ProcessCollector_fwd.h>
 #include <ProcessMetrics/ProcessMetrics.h>
 #include <prometheus/counter.h>
+#include <prometheus/family.h>
 #include <prometheus/metric_family.h>
 #include <prometheus/registry.h>
 
@@ -27,28 +29,13 @@ namespace DB
 //    Just like the original tiflash-proxy logic.
 // 2. Current implentation of async_metrics interval is 15s, it's too large. And this interval also affect pushgateway interval.
 //    So better not to mix cpu/mem metrics with async_metrics.
-// The difference between ProcessCollector and prometheus::Registry:
-// 1. ProcessCollector will **update** Gauge then collect. prometheus::Registry only collect Gauge.
 class ProcessCollector : public prometheus::Collectable
 {
 public:
-    static constexpr auto CPU_METRIC_NAME = "tiflash_proxy_process_cpu_seconds_total";
-    static constexpr auto CPU_METRIC_HELP = "Total user and system CPU time spent in seconds.";
-    static constexpr auto VSIZE_METRIC_NAME = "tiflash_proxy_process_virtual_memory_bytes";
-    static constexpr auto VSIZE_METRIC_HELP = "Virtual memory size in bytes.";
-    static constexpr auto RSS_METRIC_NAME = "tiflash_proxy_process_resident_memory_bytes";
-    static constexpr auto RSS_METRIC_HELP = "Resident memory size in bytes.";
-    static constexpr auto START_TIME_METRIC_NAME = "tiflash_proxy_process_start_time_seconds";
-    static constexpr auto START_TIME_METRIC_HELP = "Start time of the process since unix epoch in seconds.";
-
-    ProcessCollector();
-
     std::vector<prometheus::MetricFamily> Collect() const override;
 
-private:
-    mutable prometheus::Gauge cpu_total;
-    mutable prometheus::Gauge vsize;
-    mutable prometheus::Gauge rss;
-    prometheus::Gauge start_time;
+public:
+    mutable std::atomic<bool> include_proxy_metrics = {true};
 };
+
 } // namespace DB
diff --git a/dbms/src/Common/ProcessCollector_fwd.h b/dbms/src/Common/ProcessCollector_fwd.h
@@ -0,0 +1,22 @@
+// Copyright 2023 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+namespace DB
+{
+
+class ProcessCollector;
+
+}
diff --git a/dbms/src/Common/TiFlashMetrics.cpp b/dbms/src/Common/TiFlashMetrics.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <Common/CurrentMetrics.h>
+#include <Common/ProcessCollector.h>
 #include <Common/ProfileEvents.h>
 #include <Common/TiFlashMetrics.h>
 #include <common/defines.h>
@@ -27,6 +28,8 @@ TiFlashMetrics & TiFlashMetrics::instance()
 
 TiFlashMetrics::TiFlashMetrics()
 {
+    process_collector = std::make_shared<ProcessCollector>();
+
     registered_profile_events.reserve(ProfileEvents::end());
     for (ProfileEvents::Event event = 0; event < ProfileEvents::end(); event++)
     {
@@ -202,4 +205,9 @@ void TiFlashMetrics::registerStorageThreadMemory(const std::string & k)
     }
 }
 
-} // namespace DB
+void TiFlashMetrics::setProvideProxyProcessMetrics(bool v)
+{
+    process_collector->include_proxy_metrics = v;
+}
+
+} // namespace DB
diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
@@ -16,7 +16,7 @@
 
 #include <Common/ComputeLabelHolder.h>
 #include <Common/Exception.h>
-#include <Common/ProcessCollector.h>
+#include <Common/ProcessCollector_fwd.h>
 #include <Common/TiFlashBuildInfo.h>
 #include <Common/nocopyable.h>
 #include <common/types.h>
@@ -1089,6 +1089,7 @@ struct MetricFamily
         return *(resource_group_metrics_map[resource_group_name][idx]);
     }
 
+
 private:
     void addMetricsForResourceGroup(const String & resource_group_name)
     {
@@ -1143,6 +1144,7 @@ class TiFlashMetrics
     double getStorageThreadMemory(MemoryAllocType type, const std::string & k);
     void registerProxyThreadMemory(const std::string & k);
     void registerStorageThreadMemory(const std::string & k);
+    void setProvideProxyProcessMetrics(bool v);
 
 private:
     TiFlashMetrics();
@@ -1157,10 +1159,7 @@ class TiFlashMetrics
     static constexpr auto storages_thread_memory_usage = "tiflash_storages_thread_memory_usage";
 
     std::shared_ptr<prometheus::Registry> registry = std::make_shared<prometheus::Registry>();
-    // Here we add a ProcessCollector to collect cpu/rss/vsize/start_time information.
-    // Normally, these metrics will be collected by tiflash-proxy,
-    // but in disaggregated compute mode with AutoScaler, tiflash-proxy will not start, so tiflash will collect these metrics itself.
-    std::shared_ptr<ProcessCollector> cn_process_collector = std::make_shared<ProcessCollector>();
+    std::shared_ptr<ProcessCollector> process_collector;
 
     std::vector<prometheus::Gauge *> registered_profile_events;
     std::vector<prometheus::Gauge *> registered_current_metrics;

diff --git a/dbms/src/Server/MetricsPrometheus.cpp b/dbms/src/Server/MetricsPrometheus.cpp
@@ -14,6 +14,7 @@
 
 #include <Common/CurrentMetrics.h>
 #include <Common/FunctionTimerTask.h>
+#include <Common/ProcessCollector.h>
 #include <Common/ProfileEvents.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/TiFlashMetrics.h>
@@ -207,6 +208,11 @@ MetricsPrometheus::MetricsPrometheus(Context & context, const AsynchronousMetric
     auto & tiflash_metrics = TiFlashMetrics::instance();
     auto & conf = context.getConfigRef();
 
+    bool should_provide_proxy_metrics
+        = (context.getSharedContextDisagg()->isDisaggregatedComputeMode()
+           && context.getSharedContextDisagg()->use_autoscaler);
+    tiflash_metrics.setProvideProxyProcessMetrics(should_provide_proxy_metrics);
+
     // Interval to collect `ProfileEvents::Event`/`CurrentMetrics::Metric`/`AsynchronousMetrics`
     // When push mode is enabled, it also define the interval that Prometheus client push to pushgateway.
     metrics_interval = conf.getInt(status_metrics_interval, 15);
@@ -245,11 +251,7 @@ MetricsPrometheus::MetricsPrometheus(Context & context, const AsynchronousMetric
             const auto & labels = prometheus::Gateway::GetInstanceLabel(getInstanceValue(conf));
             gateway = std::make_shared<prometheus::Gateway>(host, port, job_name, labels);
             gateway->RegisterCollectable(tiflash_metrics.registry);
-            if (context.getSharedContextDisagg()->isDisaggregatedComputeMode()
-                && context.getSharedContextDisagg()->use_autoscaler)
-            {
-                gateway->RegisterCollectable(tiflash_metrics.cn_process_collector);
-            }
+            gateway->RegisterCollectable(tiflash_metrics.process_collector);
 
             LOG_INFO(log, "Enable prometheus push mode; interval = {}; addr = {}", metrics_interval, metrics_addr);
         }
@@ -268,12 +270,9 @@ MetricsPrometheus::MetricsPrometheus(Context & context, const AsynchronousMetric
             addr = listen_host + ":" + metrics_port;
         if (context.getSecurityConfig()->hasTlsConfig() && !conf.getBool(status_disable_metrics_tls, false))
         {
-            std::vector<std::weak_ptr<prometheus::Collectable>> collectables{tiflash_metrics.registry};
-            if (context.getSharedContextDisagg()->isDisaggregatedComputeMode()
-                && context.getSharedContextDisagg()->use_autoscaler)
-            {
-                collectables.push_back(tiflash_metrics.cn_process_collector);
-            }
+            std::vector<std::weak_ptr<prometheus::Collectable>> collectables{
+                tiflash_metrics.registry,
+                tiflash_metrics.process_collector};
             server = getHTTPServer(context, collectables, addr);
             server->start();
             LOG_INFO(
@@ -286,11 +285,7 @@ MetricsPrometheus::MetricsPrometheus(Context & context, const AsynchronousMetric
         {
             exposer = std::make_shared<prometheus::Exposer>(addr);
             exposer->RegisterCollectable(tiflash_metrics.registry);
-            if (context.getSharedContextDisagg()->isDisaggregatedComputeMode()
-                && context.getSharedContextDisagg()->use_autoscaler)
-            {
-                exposer->RegisterCollectable(tiflash_metrics.cn_process_collector);
-            }
+            exposer->RegisterCollectable(tiflash_metrics.process_collector);
             LOG_INFO(
                 log,
                 "Enable prometheus pull mode; Listen Host = {}, Metrics Port = {}",

diff --git a/libs/libprocess_metrics/.gitignore b/libs/libprocess_metrics/.gitignore
@@ -0,0 +1,2 @@
+/target
+
diff --git a/libs/libprocess_metrics/CMakeLists.txt b/libs/libprocess_metrics/CMakeLists.txt
@@ -20,6 +20,7 @@ add_custom_command(OUTPUT ${_PROCESS_METRICS_LIBRARY}
         COMMENT "Building process_metrics"
         COMMAND cargo build --release --target-dir ${CMAKE_CURRENT_BINARY_DIR}
         VERBATIM
+        USES_TERMINAL
         WORKING_DIRECTORY ${_PROCESS_METRICS_SOURCE_DIR}
         DEPENDS "${_PROCESS_METRICS_SRCS}"
         "${_PROCESS_METRICS_HEADERS}"