Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix MSE and MAPE Single Queries #1522

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions e2e/tools/validator/query.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
{
"mse": [
"avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])",
"avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])"
"avg_over_time((( rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]) )^2 )[{interval}:])",
"avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2 )[{interval}:])",
"avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])",
"avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2) [{interval}:])"
],
"mape": [
"avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])",
"avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])"
],
"raw": [
"kepler_{level}_package_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}",
Expand Down
19 changes: 15 additions & 4 deletions e2e/tools/validator/src/validator/cases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class CaseResult(NamedTuple):
refined_query: str

class CasesResult(NamedTuple):
test_cases: List[CaseResult]
mse_test_cases: List[CaseResult]
mape_test_cases: List[CaseResult]
raw_query_results: List[RawCaseResult]


Expand All @@ -53,6 +54,7 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c
self.queries = read_json_file(query_path)
self.raw_prom_queries = self.queries["raw"]
self.mse_prom_queries = self.queries["mse"]
self.mape_prom_queries = self.queries["mape"]
# TODO self.mape_prom_queries = queries["mape"]
self.vm_query = f"job='{self.vm_job_name}'"
if self.vm_pid != 0:
Expand All @@ -63,14 +65,23 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c
self.level = "vm"

def load_test_cases(self) -> CasesResult:
test_cases = []
mse_test_cases = []
for mse_prom_query in self.mse_prom_queries:
test_cases.append(CaseResult(
mse_test_cases.append(CaseResult(
refined_query=mse_prom_query.format(
metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name,
level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query
)
))
mape_test_cases = []
for mape_prom_query in self.mape_prom_queries:
mape_test_cases.append(CaseResult(
refined_query=mape_prom_query.format(
metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name,
level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query
)
))

raw_test_cases = []
for raw_prom_query in self.raw_prom_queries:
raw_query = raw_prom_query.format(
Expand All @@ -79,4 +90,4 @@ def load_test_cases(self) -> CasesResult:
)
file_name = create_file_name(raw_query)
raw_test_cases.append(RawCaseResult(file_name=file_name, query=raw_query))
return CasesResult(test_cases=test_cases, raw_query_results=raw_test_cases)
return CasesResult(mse_test_cases=mse_test_cases, mape_test_cases=mape_test_cases, raw_query_results=raw_test_cases)
27 changes: 26 additions & 1 deletion e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def stress(cfg: Validator, script_path: str):
metrics_validator = MetricsValidator(cfg.prometheus)
test_case_result = test_cases.load_test_cases()
click.secho("Validation results during stress test:")
mse_test_cases = test_case_result.test_cases
mse_test_cases = test_case_result.mse_test_cases
for test_case in mse_test_cases:

query = test_case.refined_query
Expand All @@ -112,6 +112,31 @@ def stress(cfg: Validator, script_path: str):
report.write("\n")
report.flush()

mape_test_cases = test_case_result.mape_test_cases
for test_case in mape_test_cases:

query = test_case.refined_query

print(f"start_time: {result.start_time}, end_time: {result.end_time} query: {query}")
metrics_res = metrics_validator.compare_metrics(result.start_time,
result.end_time,
query)

click.secho(f"Query Name: {query}", fg='bright_white')
click.secho(f"Error List: {metrics_res.el}", fg='bright_red')
click.secho(f"Average Error: {metrics_res.me}", fg='bright_yellow')

click.secho("---------------------------------------------------", fg="cyan")
report.write("#### Query\n")
report.write(f"```{query}```\n")
report.write("#### Average Error\n")
report.write(f"{metrics_res.me}\n")
report.write("#### Error List\n")
report.write(f"{metrics_res.el}\n")
report.write("\n")
report.flush()


os.makedirs(f"/tmp/validator-{tag}", exist_ok=True)
raw_query_results = test_case_result.raw_query_results
for raw_query_result in raw_query_results:
Expand Down
Loading