From 20783a0f59f2ecac99859931493955e6dd7d0a73 Mon Sep 17 00:00:00 2001 From: Lars Gierth Date: Mon, 21 Sep 2015 19:34:25 +0200 Subject: [PATCH 1/5] prometheus: only collect filesystem and netdev metrics We don't need the others, and they eat up RAM and diskspace. License: MIT Signed-off-by: Lars Gierth --- .../roles/node_exporter/templates/upstart_node_exporter.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solarnet/roles/node_exporter/templates/upstart_node_exporter.conf.j2 b/solarnet/roles/node_exporter/templates/upstart_node_exporter.conf.j2 index b8df1fc..34992fe 100644 --- a/solarnet/roles/node_exporter/templates/upstart_node_exporter.conf.j2 +++ b/solarnet/roles/node_exporter/templates/upstart_node_exporter.conf.j2 @@ -6,4 +6,4 @@ stop on starting rc RUNLEVEL=[016] respawn respawn limit unlimited -exec /usr/bin/node_exporter --web.listen-address="127.0.0.1:9100" +exec /usr/bin/node_exporter --web.listen-address="127.0.0.1:9100" -collectors.enabled=filesystem,netdev From 0e531686b5923302741e34d8a7e20dc2540541a6 Mon Sep 17 00:00:00 2001 From: Lars Gierth Date: Mon, 21 Sep 2015 19:37:25 +0200 Subject: [PATCH 2/5] prometheus: clean up target host definitions License: MIT Signed-off-by: Lars Gierth --- solarnet/metrics.yml | 4 +++- .../roles/metrics/templates/prometheus.yml.j2 | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/solarnet/metrics.yml b/solarnet/metrics.yml index 04ad236..f57e246 100644 --- a/solarnet/metrics.yml +++ b/solarnet/metrics.yml @@ -9,7 +9,9 @@ - hosts: metrics vars: - gateway_group: gateway + node_targets: "{{ cjdns_identities.keys() }}" + gateway_targets: "{{ groups.gateway }}" + storage_targets: "{{ groups.storage }}" pre_tasks: - include_vars: secrets_plaintext/secrets.yml handlers: diff --git a/solarnet/roles/metrics/templates/prometheus.yml.j2 b/solarnet/roles/metrics/templates/prometheus.yml.j2 index b18f12a..6089dcd 100644 --- a/solarnet/roles/metrics/templates/prometheus.yml.j2 +++ b/solarnet/roles/metrics/templates/prometheus.yml.j2 @@ -6,22 +6,28 @@ global: monitor: '{{ ansible_hostname }}' scrape_configs: - - job_name: 'ipfs' + - job_name: 'gateway' scrape_interval: 5s scrape_timeout: 10s metrics_path: '/debug/metrics/prometheus' target_groups: - targets: -{% for hostname in groups[gateway_group] %} +{% for hostname in gateway_targets %} - '[{{ cjdns_identities[hostname].ipv6 }}]:5001' {% endfor %} - - job_name: 'node' - scrape_interval: 5s - scrape_timeout: 10s + - job_name: 'storage' + metrics_path: '/debug/metrics/prometheus' + target_groups: + - targets: +{% for hostname in storage_targets %} + - '[{{ cjdns_identities[hostname].ipv6 }}]:5001' +{% endfor %} + + - job_name: 'host' metrics_path: '/metrics' target_groups: - targets: -{% for hostname in cjdns_identities.keys() %} +{% for hostname in node_targets %} - '[{{ cjdns_identities[hostname].ipv6 }}]:9100' {% endfor %} From fc872adf1bca9824de58ea8b6709d110c64a14c6 Mon Sep 17 00:00:00 2001 From: Lars Gierth Date: Mon, 21 Sep 2015 19:41:18 +0200 Subject: [PATCH 3/5] prometheus: performance tuning - scrape less often - removed unused monitor label - use less RAM, we were running out of it constantly License: MIT Signed-off-by: Lars Gierth --- solarnet/roles/metrics/files/Dockerfile | 1 + solarnet/roles/metrics/templates/prometheus.yml.j2 | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/solarnet/roles/metrics/files/Dockerfile b/solarnet/roles/metrics/files/Dockerfile index 3e16bdd..618101d 100644 --- a/solarnet/roles/metrics/files/Dockerfile +++ b/solarnet/roles/metrics/files/Dockerfile @@ -24,6 +24,7 @@ WORKDIR /prometheus ENTRYPOINT [ "/bin/prometheus" ] CMD [ "-config.file=/etc/prometheus/prometheus.yml", \ "-storage.local.path=/prometheus", \ + "-storage.local.memory-chunks=512288", \ "-web.path-prefix=/prometheus", \ "-web.console.libraries=/etc/prometheus/console_libraries", \ "-web.console.templates=/etc/prometheus/consoles" ] diff --git a/solarnet/roles/metrics/templates/prometheus.yml.j2 b/solarnet/roles/metrics/templates/prometheus.yml.j2 index 6089dcd..1da1203 100644 --- a/solarnet/roles/metrics/templates/prometheus.yml.j2 +++ b/solarnet/roles/metrics/templates/prometheus.yml.j2 @@ -1,14 +1,11 @@ --- global: scrape_interval: 15s + scrape_timeout: 10s evaluation_interval: 15s - labels: - monitor: '{{ ansible_hostname }}' scrape_configs: - job_name: 'gateway' - scrape_interval: 5s - scrape_timeout: 10s metrics_path: '/debug/metrics/prometheus' target_groups: - targets: From bf89890010d2e71efca1db6d2a705f2bd931e2d2 Mon Sep 17 00:00:00 2001 From: Lars Gierth Date: Mon, 21 Sep 2015 19:42:12 +0200 Subject: [PATCH 4/5] docker: fix --log-driver migration License: MIT Signed-off-by: Lars Gierth --- solarnet/roles/docker/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solarnet/roles/docker/tasks/main.yml b/solarnet/roles/docker/tasks/main.yml index 4c60ab5..7002e24 100644 --- a/solarnet/roles/docker/tasks/main.yml +++ b/solarnet/roles/docker/tasks/main.yml @@ -23,7 +23,7 @@ # XXX migration - lineinfile: dest: /etc/default/docker - line: 'DOCKER_OPTS=--log-driver=none' + line: 'DOCKER_OPTS="--log-driver=none"' state: absent notify: - restart docker From 91754c7dd3ead43f8fe3802b66a6fd7e49efdc7e Mon Sep 17 00:00:00 2001 From: Lars Gierth Date: Mon, 21 Sep 2015 19:42:31 +0200 Subject: [PATCH 5/5] docker: make sure docker is enabled and started License: MIT Signed-off-by: Lars Gierth --- solarnet/roles/docker/tasks/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/solarnet/roles/docker/tasks/main.yml b/solarnet/roles/docker/tasks/main.yml index 7002e24..696779e 100644 --- a/solarnet/roles/docker/tasks/main.yml +++ b/solarnet/roles/docker/tasks/main.yml @@ -35,3 +35,9 @@ state: present notify: - restart docker + +- name: docker service + service: + name: docker + enabled: yes + state: started