Skip to content

Commit

Permalink
add vm name option to validator (#1474)
Browse files Browse the repository at this point in the history
* add vm name option to validator

Signed-off-by: Huamin Chen <[email protected]>

* validator: externalize the query

Signed-off-by: Huamin Chen <[email protected]>

---------

Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs authored May 28, 2024
1 parent 244ae8b commit 468ed25
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 27 deletions.
18 changes: 18 additions & 0 deletions e2e/tools/validator/query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"expected_query": "rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic'}}[{interval}])",
"actual_query": "rate(kepler_node_platform_joules_total[{interval}])"
},
{
"expected_query": "rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic'}}[{interval}])",
"actual_query": "rate(kepler_node_platform_joules_total[{interval}])"
},
{
"expected_query": "rate(kepler_{level}_bpf_cpu_time_ms_total{{{query}}}[{interval}])",
"actual_query": "sum by(__name__, job) (rate(kepler_process_bpf_cpu_time_ms_total[{interval}]))"
},
{
"expected_query": "rate(kepler_{level}_bpf_page_cache_hit_total{{{query}}}[{interval}])",
"actual_query": "sum by(__name__, job) (rate(kepler_process_bpf_page_cache_hit_total[{interval}]))"
}
]
56 changes: 31 additions & 25 deletions e2e/tools/validator/src/validator/cases/__init__.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
from typing import NamedTuple, List
from validator import config
import json

def read_json_file(file_path):
try:
# Open the file for reading
with open(file_path, 'r') as file:
# Load the JSON content into a Python list of dictionaries
data = json.load(file)
return data
except FileNotFoundError:
print("The file was not found.")
return []
except json.JSONDecodeError:
print("Error decoding JSON. Please check the file format.")
return []
except Exception as e:
print(f"An error occurred: {e}")
return []

# Special Variable Names:
# vm_pid (virtual machine pid), interval (desired range vector)

RAW_PROM_QUERIES = [
{
"expected_query": "rate(kepler_process_package_joules_total{{pid='{vm_pid}', mode='dynamic'}}[{interval}])",
"actual_query": "rate(kepler_node_platform_joules_total[{interval}])",
},
{
"expected_query": "rate(kepler_process_platform_joules_total{{pid='{vm_pid}', mode='dynamic'}}[{interval}])",
"actual_query": "rate(kepler_node_platform_joules_total[{interval}])",
},
{
"expected_query": "rate(kepler_process_bpf_cpu_time_ms_total{{pid='{vm_pid}'}}[{interval}])",
"actual_query": "sum by(__name__, job) (rate(kepler_process_bpf_cpu_time_ms_total[{interval}]))",
},
{
"expected_query": "rate(kepler_process_bpf_page_cache_hit_total{{pid='{vm_pid}'}}[{interval}])",
"actual_query": "sum by(__name__, job) (rate(kepler_process_bpf_page_cache_hit_total[{interval}]))",
},


]
# Raw Prometheus Queries, read all the query from the config file

class TestCaseResult(NamedTuple):
expected_query: str
Expand All @@ -36,18 +35,25 @@ class TestCasesResult(NamedTuple):

class TestCases:

def __init__(self, vm: config.VM, prom: config.Prometheus) -> None:
def __init__(self, vm: config.VM, prom: config.Prometheus, query_path: str) -> None:
self.vm_pid = vm.pid
self.vm_name = vm.name
self.interval = prom.interval
self.raw_prom_queries = RAW_PROM_QUERIES
self.raw_prom_queries = read_json_file(query_path)

if self.vm_pid != 0:
self.query = f"pid='{{vm_pid}}'".format(vm_pid=self.vm_pid)
self.level = "process"
else:
self.query = f"vm_id=~'.*{{vm_name}}'".format(vm_name=self.vm_name)
self.level = "vm"


def load_test_cases(self) -> TestCasesResult:
test_cases = []
for raw_prom_query in self.raw_prom_queries:
test_cases.append(TestCaseResult(
expected_query=raw_prom_query["expected_query"].format(vm_pid=self.vm_pid, interval=self.interval),
actual_query=raw_prom_query["actual_query"].format(vm_pid=self.vm_pid, interval=self.interval)
expected_query=raw_prom_query["expected_query"].format(level=self.level, query=self.query, interval=self.interval),
actual_query=raw_prom_query["actual_query"].format(interval=self.interval)
))
return TestCasesResult(
test_cases=test_cases
Expand Down
2 changes: 1 addition & 1 deletion e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def stress(cfg: Validator, script_path: str):
# mae = mean_absolute_error(expected_data, actual_data)
# mape = mean_absolute_percentage_error(expected_data, actual_data)

test_cases = TestCases(cfg.metal.vm, cfg.prometheus)
test_cases = TestCases(vm = cfg.metal.vm, prom = cfg.prometheus, query_path = cfg.query_path)
metrics_validator = MetricsValidator(cfg.prometheus)
test_case_result = test_cases.load_test_cases()
click.secho("Validation results during stress test:")
Expand Down
9 changes: 8 additions & 1 deletion e2e/tools/validator/src/validator/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __repr__(self):

class VM(NamedTuple):
pid: int
name: str

class Metal(NamedTuple):
vm: VM
Expand All @@ -31,6 +32,7 @@ class Validator(NamedTuple):
remote: Remote
metal: Metal
prometheus: Prometheus
query_path: str

def __repr__(self):
return f"<Config {self.remote}@{self.prometheus}>"
Expand Down Expand Up @@ -70,7 +72,9 @@ def load(config_file: str) -> Validator:

metal_config = config['metal']
vm_config = metal_config['vm']
vm = VM( pid=vm_config['pid'],)
pid = vm_config.get('pid', 0)
vm_name = vm_config.get('name', '')
vm = VM(pid=pid, name=vm_name)
metal = Metal(vm=vm)

prometheus_config = config['prometheus']
Expand All @@ -80,8 +84,11 @@ def load(config_file: str) -> Validator:
step=prometheus_config.get('step', '3s')
)

query_path = config.get('query_path', 'query.json' )

return Validator(
remote=remote,
metal=metal,
prometheus=prometheus,
query_path=query_path
)
3 changes: 3 additions & 0 deletions e2e/tools/validator/validator.yaml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ remote:
metal:
vm:
pid: 2093543
name: my-vm

prometheus:
url: http://localhost:9090
interval: 30s
steps: 10s

query_path: ./query.json

0 comments on commit 468ed25

Please sign in to comment.