Skip to content

Commit

Permalink
feat(grafana): add grafana support (#137)
Browse files Browse the repository at this point in the history
### Description

This PR builds on the [previously-added Prometheus
support](#134) to
add Grafana support by:
- adding a `grafana_params` section to the top-level `observability`
parameter section
- deploying a Grafana server
- implementing dashboard provisioning

The `grafana` module from the `ethereum-package` package was used as
inspiration, but modified to simplify devX by removing support for
inline dashboards and improving remote dashboard source support.

Additionally, this PR implements API provisioning using the official
[grizzly](https://grafana.github.io/grizzly/) tool, over the existing
file-based provisioning approach to simplify the process of keeping
Kurtosis Grafana in-sync with hosted Grafana. To this end, two new
repositories
([`grafana-dashboards`](https://github.com/ethereum-optimism/grafana-dashboards),
[`grafana-dashboards-public`](https://github.com/ethereum-optimism/grafana-dashboards-public))
have been created, with the intention of tracking extant public &
private dashboards in hosted Grafana.

This PR has been tested and successfully deploys a Grafana server with
including all public dashboards present on our hosted Grafana instance,
organized into the same folder structure:
<img width="1214" alt="image"
src="https://github.com/user-attachments/assets/9a007f0e-fb95-4399-9a96-8be3f33e4eba"
/>

Not all dashboards are yet at full parity, but a fair number of them do
show data:
<img width="1807" alt="image"
src="https://github.com/user-attachments/assets/d1223067-9d31-4b8c-a76a-a9d7b4b4c787"
/>

If you want to try this out locally, add the following snippet to your
params file:
```yaml
optimism_package:
  observability:
    grafana_params:
      dashboard_sources:
        - github.com/ethereum-optimism/grafana-dashboards-public/resources@aa35389fc5dec4043838757e2372368c3efb0a29
```

Remaining work:
- continue converging metrics to enable additional dashboards
- implement promtail/loki support to enable log-based dashboard panels
- deploy any services required for certain dashboards (ie
replica-healthcheck) (?)
- support a subset of existing dashboards using tags/folders
- automatic updates of the
[`grafana-dashboards`](https://github.com/ethereum-optimism/grafana-dashboards)
repository
  • Loading branch information
edobry authored Jan 27, 2025
1 parent 81fa02f commit 3e6f9de
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 4 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,20 @@ optimism_package:
# Prometheus docker image to use
# Defaults to the latest image
image: "prom/prometheus:latest"
# Default grafana configuration
grafana_params:
# A list of locators for grafana dashboards to be loaded be the grafana service
dashboard_sources: []
# Resource management for grafana container
# CPU is milicores
# RAM is in MB
min_cpu: 10
max_cpu: 1000
min_mem: 128
max_mem: 2048
# Grafana docker image to use
# Defaults to the latest image
image: "grafana/grafana:latest"
# Interop configuration
interop:
# Whether or not to enable interop mode
Expand Down
11 changes: 10 additions & 1 deletion main.star
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ op_supervisor_launcher = import_module(

observability = import_module("./src/observability/observability.star")
prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star")
grafana = import_module("./src/observability/grafana/grafana_launcher.star")

wait_for_sync = import_module("./src/wait/wait_for_sync.star")
input_parser = import_module("./src/package_io/input_parser.star")
Expand Down Expand Up @@ -131,14 +132,22 @@ def run(plan, args):
observability_helper,
)

if observability_helper.enabled:
if observability_helper.enabled and len(observability_helper.metrics_jobs) > 0:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
observability_helper,
global_node_selectors,
)

plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
prometheus_private_url,
global_node_selectors,
observability_params.grafana_params,
)


def get_l1_config(all_l1_participants, l1_network_params, l1_network_id):
env_vars = {}
Expand Down
144 changes: 144 additions & 0 deletions src/observability/grafana/grafana_launcher.star
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
util = import_module("../../util.star")

ethereum_package_shared_utils = import_module(
"github.com/ethpandaops/ethereum-package/src/shared_utils/shared_utils.star"
)

SERVICE_NAME = "grafana"

HTTP_PORT_ID = "http"
HTTP_PORT_NUMBER_UINT16 = 3000

TEMPLATES_FILEPATH = "./templates"

DATASOURCE_UID = "grafanacloud-prom"
DATASOURCE_CONFIG_TEMPLATE_FILEPATH = TEMPLATES_FILEPATH + "/datasource.yml.tmpl"
DATASOURCE_CONFIG_REL_FILEPATH = "datasources/datasource.yml"

CONFIG_DIRPATH_ON_SERVICE = "/config"

USED_PORTS = {
HTTP_PORT_ID: ethereum_package_shared_utils.new_port_spec(
HTTP_PORT_NUMBER_UINT16,
ethereum_package_shared_utils.TCP_PROTOCOL,
ethereum_package_shared_utils.HTTP_APPLICATION_PROTOCOL,
)
}


def launch_grafana(
plan,
prometheus_private_url,
global_node_selectors,
grafana_params,
):
datasource_config_template = read_file(DATASOURCE_CONFIG_TEMPLATE_FILEPATH)

grafana_config_artifact_name = upload_grafana_config(
plan,
datasource_config_template,
prometheus_private_url,
)

config = get_config(
grafana_config_artifact_name,
global_node_selectors,
grafana_params,
)

service = plan.add_service(SERVICE_NAME, config)

service_url = "http://{0}:{1}".format(
service.ip_address, service.ports[HTTP_PORT_ID].number
)

provision_dashboards(plan, service_url, grafana_params.dashboard_sources)

return service_url


def upload_grafana_config(
plan,
datasource_config_template,
prometheus_private_url,
):
datasource_data = new_datasource_config_template_data(prometheus_private_url)
datasource_template_and_data = ethereum_package_shared_utils.new_template_and_data(
datasource_config_template, datasource_data
)

template_and_data_by_rel_dest_filepath = {
DATASOURCE_CONFIG_REL_FILEPATH: datasource_template_and_data,
}

grafana_config_artifact_name = plan.render_templates(
template_and_data_by_rel_dest_filepath, name="grafana-config"
)

return grafana_config_artifact_name


def new_datasource_config_template_data(prometheus_url):
return {"PrometheusUID": DATASOURCE_UID, "PrometheusURL": prometheus_url}


def get_config(
grafana_config_artifact_name,
node_selectors,
grafana_params,
):
return ServiceConfig(
image=grafana_params.image,
ports=USED_PORTS,
env_vars={
"GF_PATHS_PROVISIONING": CONFIG_DIRPATH_ON_SERVICE,
"GF_AUTH_ANONYMOUS_ENABLED": "true",
"GF_AUTH_ANONYMOUS_ORG_ROLE": "Admin",
"GF_AUTH_ANONYMOUS_ORG_NAME": "Main Org.",
# "GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH": "/dashboards/default.json",
},
files={
CONFIG_DIRPATH_ON_SERVICE: grafana_config_artifact_name,
},
min_cpu=grafana_params.min_cpu,
max_cpu=grafana_params.max_cpu,
min_memory=grafana_params.min_mem,
max_memory=grafana_params.max_mem,
node_selectors=node_selectors,
)


def provision_dashboards(plan, service_url, dashboard_sources):
if len(dashboard_sources) == 0:
return

def grr_push(dir):
return 'grr push "{0}" -e --disable-reporting'.format(dir)

def grr_push_dashboards(name):
return [
grr_push("{0}/folders".format(name)),
grr_push("{0}/dashboards".format(name)),
]

grr_commands = [
"grr config create-context kurtosis",
]

files = {}
for index, dashboard_src in enumerate(dashboard_sources):
dashboard_name = "dashboards-{0}".format(index)
dashboard_artifact_name = plan.upload_files(dashboard_src, name=dashboard_name)

files[dashboard_name] = dashboard_artifact_name
grr_commands += grr_push_dashboards(dashboard_name)

plan.run_sh(
description="upload dashboards",
image="grafana/grizzly:main-0b88d01",
env_vars={
"GRAFANA_URL": service_url,
},
files=files,
run=util.join_cmds(grr_commands),
)
12 changes: 12 additions & 0 deletions src/observability/grafana/templates/datasource.yml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
uid: {{ .PrometheusUID }}
url: {{ .PrometheusURL }}
basicAuth: false
isDefault: true
editable: true
2 changes: 2 additions & 0 deletions src/observability/observability.star
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def register_service_metrics_job(
):
labels = {
"service": service_name,
"namespace": "kurtosis",
"stack_optimism_io_network": "kurtosis",
}
labels.update(additional_labels)

Expand Down
3 changes: 0 additions & 3 deletions src/observability/prometheus/prometheus_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ def launch_prometheus(
observability_helper,
global_node_selectors,
):
if len(observability_helper.metrics_jobs) == 0:
return None

prometheus_params = observability_helper.params.prometheus_params

prometheus_url = prometheus.run(
Expand Down
26 changes: 26 additions & 0 deletions src/package_io/input_parser.star
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ def input_parser(plan, input_args):
min_mem=results["observability"]["prometheus_params"]["min_mem"],
max_mem=results["observability"]["prometheus_params"]["max_mem"],
),
grafana_params=struct(
image=results["observability"]["grafana_params"]["image"],
dashboard_sources=results["observability"]["grafana_params"][
"dashboard_sources"
],
min_cpu=results["observability"]["grafana_params"]["min_cpu"],
max_cpu=results["observability"]["grafana_params"]["max_cpu"],
min_mem=results["observability"]["grafana_params"]["min_mem"],
max_mem=results["observability"]["grafana_params"]["max_mem"],
),
),
interop=struct(
enabled=results["interop"]["enabled"],
Expand Down Expand Up @@ -225,6 +235,11 @@ def parse_network_params(plan, input_args):
input_args.get("observability", {}).get("prometheus_params", {})
)

results["observability"]["grafana_params"] = default_grafana_params()
results["observability"]["grafana_params"].update(
input_args.get("observability", {}).get("grafana_params", {})
)

# configure interop

results["interop"] = default_interop_params()
Expand Down Expand Up @@ -383,6 +398,17 @@ def default_prometheus_params():
}


def default_grafana_params():
return {
"image": "grafana/grafana:latest",
"dashboard_sources": [],
"min_cpu": 10,
"max_cpu": 1000,
"min_mem": 128,
"max_mem": 2048,
}


def default_interop_params():
return {
"enabled": False,
Expand Down
18 changes: 18 additions & 0 deletions src/package_io/sanity_check.star
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
OBSERVABILITY_PARAMS = [
"enabled",
"prometheus_params",
"grafana_params",
]

PROMETHEUS_PARAMS = [
Expand All @@ -13,6 +14,15 @@ PROMETHEUS_PARAMS = [
"max_mem",
]

GRAFANA_PARAMS = [
"image",
"dashboard_sources",
"min_cpu",
"max_cpu",
"min_mem",
"max_mem",
]

INTEROP_PARAMS = [
"enabled",
"supervisor_params",
Expand Down Expand Up @@ -166,6 +176,14 @@ def sanity_check(plan, optimism_config):
PROMETHEUS_PARAMS,
)

if "grafana_params" in optimism_config["observability"]:
validate_params(
plan,
optimism_config["observability"],
"grafana_params",
GRAFANA_PARAMS,
)

if "interop" in optimism_config:
validate_params(
plan,
Expand Down
4 changes: 4 additions & 0 deletions src/util.star
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ def label_from_image(image):
if len(label) > max_length:
label = label[-max_length:]
return label


def join_cmds(commands):
return " && ".join(commands)

0 comments on commit 3e6f9de

Please sign in to comment.