From 6219d21ba5ec19360fa818394f1811c0c74690dd Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Wed, 12 Jun 2024 10:43:17 -0400 Subject: [PATCH] Fix MSE and MAPE Single Queries Fixed MSE Queries and added MAPE Queries. Included Query outputs in cli. Signed-off-by: Kaiyi --- e2e/tools/validator/query.json | 24 +++++++++++------ .../validator/src/validator/cases/__init__.py | 19 ++++++++++--- .../validator/src/validator/cli/__init__.py | 27 ++++++++++++++++++- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/e2e/tools/validator/query.json b/e2e/tools/validator/query.json index 19caf667fd..c247aa1beb 100644 --- a/e2e/tools/validator/query.json +++ b/e2e/tools/validator/query.json @@ -1,15 +1,23 @@ { "mse": [ - "avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])", - "avg_over_time((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}])^2 - rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])^2)[{interval}:])" + "avg_over_time((( rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]) )^2 )[{interval}:])", + "avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])", + "avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2 )[{interval}:])", + "avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}]))^2)[{interval}:])", + "avg_over_time(((rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])", + "avg_over_time(((rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])", + "avg_over_time(((rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2)[{interval}:])", + "avg_over_time(((rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}]))^2) [{interval}:])" ], "mape": [ + "avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='dynamic'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='dynamic', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_core_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_core_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_dram_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_dram_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_package_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_package_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])", + "avg_over_time((abs(rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]) - on() rate(kepler_node_platform_joules_total{{{vm_query}, mode='idle'}}[{interval}])) / on() rate(kepler_{level}_platform_joules_total{{{query}, mode='idle', job='{metal_job_name}'}}[{interval}]))[{interval}:])" ], "raw": [ "kepler_{level}_package_joules_total{{{query}, job='{metal_job_name}', mode='dynamic'}}", diff --git a/e2e/tools/validator/src/validator/cases/__init__.py b/e2e/tools/validator/src/validator/cases/__init__.py index 52c54f047d..dcbf7c30fe 100644 --- a/e2e/tools/validator/src/validator/cases/__init__.py +++ b/e2e/tools/validator/src/validator/cases/__init__.py @@ -38,7 +38,8 @@ class CaseResult(NamedTuple): refined_query: str class CasesResult(NamedTuple): - test_cases: List[CaseResult] + mse_test_cases: List[CaseResult] + mape_test_cases: List[CaseResult] raw_query_results: List[RawCaseResult] @@ -53,6 +54,7 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c self.queries = read_json_file(query_path) self.raw_prom_queries = self.queries["raw"] self.mse_prom_queries = self.queries["mse"] + self.mape_prom_queries = self.queries["mape"] # TODO self.mape_prom_queries = queries["mape"] self.vm_query = f"job='{self.vm_job_name}'" if self.vm_pid != 0: @@ -63,14 +65,23 @@ def __init__(self, metal_job_name: str, vm_job_name: str, vm: config.VM, prom: c self.level = "vm" def load_test_cases(self) -> CasesResult: - test_cases = [] + mse_test_cases = [] for mse_prom_query in self.mse_prom_queries: - test_cases.append(CaseResult( + mse_test_cases.append(CaseResult( refined_query=mse_prom_query.format( metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name, level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query ) )) + mape_test_cases = [] + for mape_prom_query in self.mape_prom_queries: + mape_test_cases.append(CaseResult( + refined_query=mape_prom_query.format( + metal_job_name = self.metal_job_name, vm_job_name = self.vm_job_name, + level=self.level, query=self.query, interval=self.interval, vm_query=self.vm_query + ) + )) + raw_test_cases = [] for raw_prom_query in self.raw_prom_queries: raw_query = raw_prom_query.format( @@ -79,4 +90,4 @@ def load_test_cases(self) -> CasesResult: ) file_name = create_file_name(raw_query) raw_test_cases.append(RawCaseResult(file_name=file_name, query=raw_query)) - return CasesResult(test_cases=test_cases, raw_query_results=raw_test_cases) + return CasesResult(mse_test_cases=mse_test_cases, mape_test_cases=mape_test_cases, raw_query_results=raw_test_cases) diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index 1200d77ab7..bb203d0b35 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -88,7 +88,7 @@ def stress(cfg: Validator, script_path: str): metrics_validator = MetricsValidator(cfg.prometheus) test_case_result = test_cases.load_test_cases() click.secho("Validation results during stress test:") - mse_test_cases = test_case_result.test_cases + mse_test_cases = test_case_result.mse_test_cases for test_case in mse_test_cases: query = test_case.refined_query @@ -112,6 +112,31 @@ def stress(cfg: Validator, script_path: str): report.write("\n") report.flush() + mape_test_cases = test_case_result.mape_test_cases + for test_case in mape_test_cases: + + query = test_case.refined_query + + print(f"start_time: {result.start_time}, end_time: {result.end_time} query: {query}") + metrics_res = metrics_validator.compare_metrics(result.start_time, + result.end_time, + query) + + click.secho(f"Query Name: {query}", fg='bright_white') + click.secho(f"Error List: {metrics_res.el}", fg='bright_red') + click.secho(f"Average Error: {metrics_res.me}", fg='bright_yellow') + + click.secho("---------------------------------------------------", fg="cyan") + report.write("#### Query\n") + report.write(f"```{query}```\n") + report.write("#### Average Error\n") + report.write(f"{metrics_res.me}\n") + report.write("#### Error List\n") + report.write(f"{metrics_res.el}\n") + report.write("\n") + report.flush() + + os.makedirs(f"/tmp/validator-{tag}", exist_ok=True) raw_query_results = test_case_result.raw_query_results for raw_query_result in raw_query_results: