diff --git a/README.md b/README.md index d9721a58..b25a55b7 100644 --- a/README.md +++ b/README.md @@ -609,14 +609,17 @@ def child_exit(server, worker): **4. Metrics tuning (Gauge)**: -When `Gauge` metrics are used, additional tuning needs to be performed. +When `Gauge`s are used in multiprocess applications, +you must decide how to handle the metrics reported by each process. Gauges have several modes they can run in, which can be selected with the `multiprocess_mode` parameter. -- 'all': Default. Return a timeseries per process alive or dead. -- 'liveall': Return a timeseries per process that is still alive. -- 'livesum': Return a single timeseries that is the sum of the values of alive processes. -- 'max': Return a single timeseries that is the maximum of the values of all processes, alive or dead. -- 'min': Return a single timeseries that is the minimum of the values of all processes, alive or dead. +- 'all': Default. Return a timeseries per process (alive or dead), labelled by the process's `pid` (the label is added internally). +- 'min': Return a single timeseries that is the minimum of the values of all processes (alive or dead). +- 'max': Return a single timeseries that is the maximum of the values of all processes (alive or dead). +- 'sum': Return a single timeseries that is the sum of the values of all processes (alive or dead). + +Prepend 'live' to the beginning of the mode to return the same result but only considering living processes +(e.g., 'liveall, 'livesum', 'livemax', 'livemin'). ```python from prometheus_client import Gauge diff --git a/prometheus_client/metrics.py b/prometheus_client/metrics.py index 41bec517..8878fb86 100644 --- a/prometheus_client/metrics.py +++ b/prometheus_client/metrics.py @@ -346,7 +346,7 @@ def f(): d.set_function(lambda: len(my_dict)) """ _type = 'gauge' - _MULTIPROC_MODES = frozenset(('min', 'max', 'livesum', 'liveall', 'all')) + _MULTIPROC_MODES = frozenset(('all', 'liveall', 'min', 'livemin', 'max', 'livemax', 'sum', 'livesum')) def __init__(self, name: str, diff --git a/prometheus_client/multiprocess.py b/prometheus_client/multiprocess.py index 6c953747..5a23c482 100644 --- a/prometheus_client/multiprocess.py +++ b/prometheus_client/multiprocess.py @@ -4,6 +4,7 @@ import os import warnings +from .metrics import Gauge from .metrics_core import Metric from .mmap_dict import MmapedDict from .samples import Sample @@ -63,8 +64,8 @@ def _parse_key(key): try: file_values = MmapedDict.read_all_values_from_file(f) except FileNotFoundError: - if typ == 'gauge' and parts[1] in ('liveall', 'livesum'): - # Those files can disappear between the glob of collect + if typ == 'gauge' and parts[1].startswith('live'): + # Files for 'live*' gauges can be deleted between the glob of collect # and now (via a mark_process_dead call) so don't fail if # the file is missing continue @@ -96,15 +97,15 @@ def _accumulate_metrics(metrics, accumulate): name, labels, value, timestamp, exemplar = s if metric.type == 'gauge': without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid')) - if metric._multiprocess_mode == 'min': + if metric._multiprocess_mode in ('min', 'livemin'): current = samples_setdefault(without_pid_key, value) if value < current: samples[without_pid_key] = value - elif metric._multiprocess_mode == 'max': + elif metric._multiprocess_mode in ('max', 'livemax'): current = samples_setdefault(without_pid_key, value) if value > current: samples[without_pid_key] = value - elif metric._multiprocess_mode == 'livesum': + elif metric._multiprocess_mode in ('sum', 'livesum'): samples[without_pid_key] += value else: # all/liveall samples[(name, labels)] = value @@ -152,11 +153,13 @@ def collect(self): return self.merge(files, accumulate=True) +_LIVE_GAUGE_MULTIPROCESS_MODES = {m for m in Gauge._MULTIPROC_MODES if m.startswith('live')} + + def mark_process_dead(pid, path=None): """Do bookkeeping for when one process dies in a multi-process setup.""" if path is None: path = os.environ.get('PROMETHEUS_MULTIPROC_DIR', os.environ.get('prometheus_multiproc_dir')) - for f in glob.glob(os.path.join(path, f'gauge_livesum_{pid}.db')): - os.remove(f) - for f in glob.glob(os.path.join(path, f'gauge_liveall_{pid}.db')): - os.remove(f) + for mode in _LIVE_GAUGE_MULTIPROCESS_MODES: + for f in glob.glob(os.path.join(path, f'gauge_{mode}_{pid}.db')): + os.remove(f) diff --git a/tests/test_multiprocess.py b/tests/test_multiprocess.py index 9ec0578f..a41903a1 100644 --- a/tests/test_multiprocess.py +++ b/tests/test_multiprocess.py @@ -132,6 +132,17 @@ def test_gauge_min(self): g2.set(2) self.assertEqual(1, self.registry.get_sample_value('g')) + def test_gauge_livemin(self): + g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livemin') + values.ValueClass = MultiProcessValue(lambda: 456) + g2 = Gauge('g', 'help', registry=None, multiprocess_mode='livemin') + self.assertEqual(0, self.registry.get_sample_value('g')) + g1.set(1) + g2.set(2) + self.assertEqual(1, self.registry.get_sample_value('g')) + mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR']) + self.assertEqual(2, self.registry.get_sample_value('g')) + def test_gauge_max(self): g1 = Gauge('g', 'help', registry=None, multiprocess_mode='max') values.ValueClass = MultiProcessValue(lambda: 456) @@ -141,6 +152,28 @@ def test_gauge_max(self): g2.set(2) self.assertEqual(2, self.registry.get_sample_value('g')) + def test_gauge_livemax(self): + g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livemax') + values.ValueClass = MultiProcessValue(lambda: 456) + g2 = Gauge('g', 'help', registry=None, multiprocess_mode='livemax') + self.assertEqual(0, self.registry.get_sample_value('g')) + g1.set(2) + g2.set(1) + self.assertEqual(2, self.registry.get_sample_value('g')) + mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR']) + self.assertEqual(1, self.registry.get_sample_value('g')) + + def test_gauge_sum(self): + g1 = Gauge('g', 'help', registry=None, multiprocess_mode='sum') + values.ValueClass = MultiProcessValue(lambda: 456) + g2 = Gauge('g', 'help', registry=None, multiprocess_mode='sum') + self.assertEqual(0, self.registry.get_sample_value('g')) + g1.set(1) + g2.set(2) + self.assertEqual(3, self.registry.get_sample_value('g')) + mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR']) + self.assertEqual(3, self.registry.get_sample_value('g')) + def test_gauge_livesum(self): g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livesum') values.ValueClass = MultiProcessValue(lambda: 456)