Skip to content

Commit

Permalink
report validator results
Browse files Browse the repository at this point in the history
Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs committed Jun 6, 2024
1 parent 1f5dc6d commit aec3ab5
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 12 deletions.
3 changes: 3 additions & 0 deletions e2e/tools/validator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ pip install .

## Usage

Configure Prometheus jobs. The one to scrape the metal metrics is assumed to be named `metal` and the one to scrape the
VM metrics is assumed to be named `vm`.

Generate the validator.yaml file based on [validator.yaml.sample](validator.yaml.sample) and run the following command:
```console

Expand Down
7 changes: 2 additions & 5 deletions e2e/tools/validator/query.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
[
"abs((rate(kepler_{level}_package_joules_total{{{query}, job='metal', mode='dynamic'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, job='metal', mode='dynamic'}}[{interval}]))",
"abs((rate(kepler_{level}_platform_joules_total{{{query}, job='metal', mode='dynamic'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, job='metal', mode='dynamic'}}[{interval}]))"

"abs((rate(kepler_{level}_package_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}[{interval}]))",
"abs((rate(kepler_{level}_platform_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}[{interval}]))"
]


11 changes: 8 additions & 3 deletions e2e/tools/validator/src/validator/cases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ class CasesResult(NamedTuple):

class Cases:

def __init__(self, vm: config.VM, prom: config.Prometheus, query_path: str) -> None:
def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: config.Prometheus, query_path: str) -> None:
self.vm_pid = vm.pid
self.vm_name = vm.name
self.metal_job_name = metal_job_name
self.vm_job_name = vm_job_name
self.interval = prom.interval
self.raw_prom_queries = read_json_file(query_path)
self.vm_query = "job='vm'"
self.vm_query = f"job='{self.vm_job_name}'"
if self.vm_pid != 0:
self.query = f"pid='{{vm_pid}}'".format(vm_pid=self.vm_pid)
self.level = "process"
Expand All @@ -51,7 +53,10 @@ def load_test_cases(self) -> CasesResult:
test_cases = []
for raw_prom_query in self.raw_prom_queries:
test_cases.append(CaseResult(
refined_query=raw_prom_query.format(level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query)
refined_query=raw_prom_query.format(
metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name,
level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query
)
))
return CasesResult(
test_cases=test_cases
Expand Down
56 changes: 53 additions & 3 deletions e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: APACHE-2.0

import click
import subprocess
from validator.__about__ import __version__
from validator.stresser import ( Remote )

Expand All @@ -14,6 +15,10 @@
Validator, load
)

from validator.specs import (
get_host_cpu_spec, get_vm_cpu_spec, get_host_dram_size, get_vm_dram_size
)

pass_config = click.make_pass_decorator(Validator)

@click.group(
Expand All @@ -38,18 +43,55 @@ def validator(ctx: click.Context, config_file: str):
)
@pass_config
def stress(cfg: Validator, script_path: str):
# run git describe command and get the output as the report name
tag = ""
git_describe = subprocess.run(["git", "describe", "--tag"], stdout=subprocess.PIPE)
if git_describe.stdout:
tag = git_describe.stdout.decode().strip()
host_cpu_spec = get_host_cpu_spec()
vm_cpu_spec = get_vm_cpu_spec()
host_dram_size = get_host_dram_size()
vm_dram_size = get_vm_dram_size()
# save all the print result into a markdown file as a table
report = open(f"/tmp/report-{tag}.md", "w")
# create section header for specs
report.write(f"# {tag}\n")
report.write("## Specs\n")
report.write("### Host CPU Specs\n")
report.write("| Model | Cores | Threads | Sockets | Flags |\n")
report.write("|-----------|-----------|-------------|-------------|-----------|\n")
report.write(f"| {host_cpu_spec['cpu']['model']} | {host_cpu_spec['cpu']['cores']} | {host_cpu_spec['cpu']['threads']} | {host_cpu_spec['cpu']['sockets']} | ```{host_cpu_spec['cpu']['flags']}``` |\n")
report.write("### VM CPU Specs\n")
report.write("| Model | Cores | Threads | Sockets | Flags |\n")
report.write("|-----------|-----------|-------------|-------------|-----------|\n")
report.write(f"| {vm_cpu_spec['cpu']['model']} | {vm_cpu_spec['cpu']['cores']} | {vm_cpu_spec['cpu']['threads']} | {vm_cpu_spec['cpu']['sockets']} | ```{vm_cpu_spec['cpu']['flags']}``` |\n")
report.write("### Host DRAM Size\n")
report.write("| Size |\n")
report.write("|------|\n")
report.write(f"| {host_dram_size} |\n")
report.write("### VM DRAM Size\n")
report.write("| Size |\n")
report.write("|------|\n")
report.write(f"| {vm_dram_size} |\n")
report.write("\n")
# create section header for validation results
report.write("## Validation Results\n")
report.flush()
remote = Remote(cfg.remote)
result = remote.run_script(script_path=script_path)
click.echo(f"start_time: {result.start_time}, end_time: {result.end_time}")
test_cases = Cases(vm = cfg.metal.vm, prom = cfg.prometheus, query_path = cfg.query_path)
test_cases = Cases(
vm = cfg.metal.vm, metal_job_name = cfg.metal.metal_job_name, vm_job_name = cfg.metal.vm_job_name,
prom = cfg.prometheus, query_path = cfg.query_path
)
metrics_validator = MetricsValidator(cfg.prometheus)
test_case_result = test_cases.load_test_cases()
click.secho("Validation results during stress test:")
for test_case in test_case_result.test_cases:

query = test_case.refined_query

print(f"start_time: {result.start_time}, end_time: {result.end_time}")
print(f"start_time: {result.start_time}, end_time: {result.end_time} query: {query}")
metrics_res = metrics_validator.compare_metrics(result.start_time,
result.end_time,
query)
Expand All @@ -59,6 +101,14 @@ def stress(cfg: Validator, script_path: str):
click.secho(f"Average Error: {metrics_res.me}", fg='bright_yellow')

click.secho("---------------------------------------------------", fg="cyan")

report.write("#### Query\n")
report.write(f"```{query}```\n")
report.write("#### Average Error\n")
report.write(f"{metrics_res.me}\n")
report.write("#### Error List\n")
report.write(f"{metrics_res.el}\n")
report.write("\n")
report.flush()
report.close()


6 changes: 5 additions & 1 deletion e2e/tools/validator/src/validator/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class VM(NamedTuple):
name: str

class Metal(NamedTuple):
metal_job_name: str
vm_job_name: str
vm: VM

class Prometheus(NamedTuple):
Expand Down Expand Up @@ -71,11 +73,13 @@ def load(config_file: str) -> Validator:
)

metal_config = config['metal']
metal_job_name = metal_config.get('metal_job_name', 'metal')
vm_job_name = metal_config.get('vm_job_name', 'vm')
vm_config = metal_config['vm']
pid = vm_config.get('pid', 0)
vm_name = vm_config.get('name', '')
vm = VM(pid=pid, name=vm_name)
metal = Metal(vm=vm)
metal = Metal(vm=vm, metal_job_name=metal_job_name, vm_job_name=vm_job_name)

prometheus_config = config['prometheus']
prometheus = Prometheus(
Expand Down
69 changes: 69 additions & 0 deletions e2e/tools/validator/src/validator/specs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-FileCopyrightText: 2024-present Sunil Thaha <[email protected]>
#
# SPDX-License-Identifier: APACHE-2.0

# a python program to get host and VM cpu spec, dram size, number of cpu cores, and return a json output
import json
import os
import subprocess
import sys
import re

def parse_lscpu_output(output: str):
cpu_spec = {}
cpu_spec["cpu"] = {}
cpu_spec["cpu"]["model"] = ""
cpu_spec["cpu"]["cores"] = ""
cpu_spec["cpu"]["threads"] = ""
cpu_spec["cpu"]["sockets"] = ""
cpu_spec["cpu"]["flags"] = ""

for line in output.split("\n"):
if line:
key, value = line.split(":", 1)
if key == "Model name":
cpu_spec["cpu"]["model"] = value.strip()
elif key == "CPU(s)":
cpu_spec["cpu"]["cores"] = value.strip()
elif key == "Thread(s) per core":
cpu_spec["cpu"]["threads"] = value.strip()
elif key == "Socket(s)":
cpu_spec["cpu"]["sockets"] = value.strip()
elif key == "Flags":
cpu_spec["cpu"]["flags"] = value.strip()
return cpu_spec

def get_host_cpu_spec():
# get host cpu spec
host_cpu_spec = {}
lscpu = subprocess.run(["lscpu"], stdout=subprocess.PIPE)
if lscpu.stdout:
host_cpu_spec = parse_lscpu_output(lscpu.stdout.decode())
return host_cpu_spec

def get_vm_cpu_spec(login: str = "root", vm_addr: str = "my-vm", key_path: str = "/tmp/vm_ssh_key"):
vm_cpu_spec = {}
# run ssh command to get the cpu spec of the VM
ssh = subprocess.run(["ssh", "-i", key_path, login + "@" + vm_addr, "lscpu"], stdout=subprocess.PIPE)
if ssh.stdout:
vm_cpu_spec = parse_lscpu_output(ssh.stdout.decode())
return vm_cpu_spec

def get_host_dram_size():
# get host dram size
dram_size = ""
meminfo = open("/proc/meminfo", "r")
for line in meminfo:
if "MemTotal" in line:
dram_size = line.split(":")[1].strip()
return dram_size

def get_vm_dram_size(login: str = "root", vm_addr: str = "my-vm", key_path: str = "/tmp/vm_ssh_key"):
# get vm dram size
vm_dram_size = ""
ssh = subprocess.run(["ssh", "-i", key_path, login + "@" + vm_addr, "cat /proc/meminfo"], stdout=subprocess.PIPE)
if ssh.stdout:
for line in ssh.stdout.decode().split("\n"):
if "MemTotal" in line:
vm_dram_size = line.split(":")[1].strip()
return vm_dram_size
2 changes: 2 additions & 0 deletions e2e/tools/validator/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ remote:
# pkey: ~/.ssh/id_rsa

metal:
metal_job_name: metal
vm_job_name: vm
vm:
pid: 2093543

Expand Down
2 changes: 2 additions & 0 deletions e2e/tools/validator/validator.yaml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ remote:
# pkey: ~/.ssh/id_rsa

metal:
metal_job_name: metal
vm_job_name: vm
vm:
pid: 2093543
name: my-vm
Expand Down

0 comments on commit aec3ab5

Please sign in to comment.