Skip to content

Commit

Permalink
Fix MSE and MAPE Single Queries (#1522)
Browse files Browse the repository at this point in the history
Fixed MSE Queries and added MAPE Queries.
Included Query outputs in cli.

Signed-off-by: Kaiyi <[email protected]>
  • Loading branch information
KaiyiLiu1234 authored Jun 12, 2024
1 parent 3172809 commit 9114e75
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 13 deletions.
24 changes: 16 additions & 8 deletions e2e/tools/validator/query.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
{
"mse": [
"avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])"
"avg_over_time((( rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]) )^2 )[{interval}:])",
"avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2 )[{interval}:])",
"avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2) [{interval}:])"
],
"mape": [
"avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])"
],
"raw": [
"kepler_{level}_package_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}",
Expand Down
19 changes: 15 additions & 4 deletions e2e/tools/validator/src/validator/cases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class CaseResult(NamedTuple):
refined_query: str

class CasesResult(NamedTuple):
test_cases: List[CaseResult]
mse_test_cases: List[CaseResult]
mape_test_cases: List[CaseResult]
raw_query_results: List[RawCaseResult]


Expand All @@ -53,6 +54,7 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c
self.queries = read_json_file(query_path)
self.raw_prom_queries = self.queries["raw"]
self.mse_prom_queries = self.queries["mse"]
self.mape_prom_queries = self.queries["mape"]
# TODO self.mape_prom_queries = queries["mape"]
self.vm_query = f"job='{self.vm_job_name}'"
if self.vm_pid != 0:
Expand All @@ -63,14 +65,23 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c
self.level = "vm"

def load_test_cases(self) -> CasesResult:
test_cases = []
mse_test_cases = []
for mse_prom_query in self.mse_prom_queries:
test_cases.append(CaseResult(
mse_test_cases.append(CaseResult(
refined_query=mse_prom_query.format(
metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name,
level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query
)
))
mape_test_cases = []
for mape_prom_query in self.mape_prom_queries:
mape_test_cases.append(CaseResult(
refined_query=mape_prom_query.format(
metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name,
level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query
)
))

raw_test_cases = []
for raw_prom_query in self.raw_prom_queries:
raw_query = raw_prom_query.format(
Expand All @@ -79,4 +90,4 @@ def load_test_cases(self) -> CasesResult:
)
file_name = create_file_name(raw_query)
raw_test_cases.append(RawCaseResult(file_name=file_name, query=raw_query))
return CasesResult(test_cases=test_cases, raw_query_results=raw_test_cases)
return CasesResult(mse_test_cases=mse_test_cases, mape_test_cases=mape_test_cases, raw_query_results=raw_test_cases)
27 changes: 26 additions & 1 deletion e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def stress(cfg: Validator, script_path: str):
metrics_validator = MetricsValidator(cfg.prometheus)
test_case_result = test_cases.load_test_cases()
click.secho("Validation results during stress test:")
mse_test_cases = test_case_result.test_cases
mse_test_cases = test_case_result.mse_test_cases
for test_case in mse_test_cases:

query = test_case.refined_query
Expand All @@ -112,6 +112,31 @@ def stress(cfg: Validator, script_path: str):
report.write("\n")
report.flush()

mape_test_cases = test_case_result.mape_test_cases
for test_case in mape_test_cases:

query = test_case.refined_query

print(f"start_time: {result.start_time}, end_time: {result.end_time} query: {query}")
metrics_res = metrics_validator.compare_metrics(result.start_time,
result.end_time,
query)

click.secho(f"Query Name: {query}", fg='bright_white')
click.secho(f"Error List: {metrics_res.el}", fg='bright_red')
click.secho(f"Average Error: {metrics_res.me}", fg='bright_yellow')

click.secho("---------------------------------------------------", fg="cyan")
report.write("#### Query\n")
report.write(f"```{query}```\n")
report.write("#### Average Error\n")
report.write(f"{metrics_res.me}\n")
report.write("#### Error List\n")
report.write(f"{metrics_res.el}\n")
report.write("\n")
report.flush()


os.makedirs(f"/tmp/validator-{tag}", exist_ok=True)
raw_query_results = test_case_result.raw_query_results
for raw_query_result in raw_query_results:
Expand Down

0 comments on commit 9114e75

Please sign in to comment.