From 576ab21304d6c7900f538b752f6b023eb0cb2a65 Mon Sep 17 00:00:00 2001 From: Stephanie Labasan Brink Date: Sun, 14 Jan 2024 22:22:05 -0800 Subject: [PATCH] json output for frequency data (#488) - add docs - add variorum_get_frequency_json function interfaces - supports Intel CPUs, AMD CPUs, IBM Power9 CPUs, AMD GPUs, nVidia GPUs --------- Co-authored-by: Kyle Fan --- .../sphinx/api/json_support_functions.rst | 2 + src/examples/CMakeLists.txt | 1 + ...variorum-get-node-frequency-json-example.c | 45 ++++++++++ src/variorum/AMD/config_amd.c | 1 + src/variorum/AMD/epyc.c | 45 ++++++++++ src/variorum/AMD/epyc.h | 4 + src/variorum/AMD_GPU/amd_gpu_power_features.c | 86 ++++++++++++++++++ src/variorum/AMD_GPU/amd_gpu_power_features.h | 6 ++ src/variorum/AMD_GPU/config_amd_gpu.c | 1 + src/variorum/AMD_GPU/instinctGPU.c | 21 +++++ src/variorum/AMD_GPU/instinctGPU.h | 4 + src/variorum/IBM/Power9.c | 61 +++++++++++++ src/variorum/IBM/Power9.h | 4 + src/variorum/IBM/config_ibm.c | 2 + src/variorum/IBM/ibm_power_features.c | 39 ++++++++ src/variorum/IBM/ibm_power_features.h | 6 ++ src/variorum/Intel/Intel_06_2A.c | 14 +++ src/variorum/Intel/Intel_06_2A.h | 4 + src/variorum/Intel/Intel_06_2D.c | 15 ++++ src/variorum/Intel/Intel_06_2D.h | 4 + src/variorum/Intel/Intel_06_3E.c | 14 +++ src/variorum/Intel/Intel_06_3E.h | 4 + src/variorum/Intel/Intel_06_3F.c | 15 ++++ src/variorum/Intel/Intel_06_3F.h | 4 + src/variorum/Intel/Intel_06_4F.c | 14 +++ src/variorum/Intel/Intel_06_4F.h | 4 + src/variorum/Intel/Intel_06_55.c | 14 +++ src/variorum/Intel/Intel_06_55.h | 2 + src/variorum/Intel/Intel_06_9E.c | 14 +++ src/variorum/Intel/Intel_06_9E.h | 2 + src/variorum/Intel/clocks_features.c | 89 +++++++++++++++++++ src/variorum/Intel/clocks_features.h | 15 ++++ src/variorum/Intel/config_intel.c | 12 +++ src/variorum/Nvidia_GPU/Volta.c | 21 +++++ src/variorum/Nvidia_GPU/Volta.h | 2 + src/variorum/Nvidia_GPU/config_nvidia.c | 1 + .../Nvidia_GPU/nvidia_gpu_power_features.c | 37 ++++++++ .../Nvidia_GPU/nvidia_gpu_power_features.h | 5 ++ src/variorum/config_architecture.c | 1 + src/variorum/config_architecture.h | 5 ++ src/variorum/variorum.c | 51 +++++++++++ src/variorum/variorum.h | 21 +++++ 42 files changed, 712 insertions(+) create mode 100644 src/examples/variorum-get-node-frequency-json-example.c diff --git a/src/docs/sphinx/api/json_support_functions.rst b/src/docs/sphinx/api/json_support_functions.rst index b2f33ce4b..05c4b6fcd 100644 --- a/src/docs/sphinx/api/json_support_functions.rst +++ b/src/docs/sphinx/api/json_support_functions.rst @@ -17,3 +17,5 @@ Defined in ``variorum/variorum.h``. .. doxygenfunction:: variorum_get_node_power_domain_info_json .. doxygenfunction:: variorum_get_thermals_json + +.. doxygenfunction:: variorum_get_node_frequency_json diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt index 84f896d0c..31d435419 100644 --- a/src/examples/CMakeLists.txt +++ b/src/examples/CMakeLists.txt @@ -16,6 +16,7 @@ set(BASIC_EXAMPLES variorum-cap-socket-power-limit-example variorum-disable-turbo-example variorum-enable-turbo-example + variorum-get-node-frequency-json-example variorum-get-node-power-domain-info-json-example variorum-get-node-power-json-example variorum-get-node-thermal-json-example diff --git a/src/examples/variorum-get-node-frequency-json-example.c b/src/examples/variorum-get-node-frequency-json-example.c new file mode 100644 index 000000000..6a0a455ab --- /dev/null +++ b/src/examples/variorum-get-node-frequency-json-example.c @@ -0,0 +1,45 @@ +// Copyright 2019-2023 Lawrence Livermore National Security, LLC and other +// Variorum Project Developers. See the top-level LICENSE file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include + +int main(int argc, char **argv) +{ + int ret; + + const char *usage = "Usage: %s [-h] [-v]\n"; + int opt; + while ((opt = getopt(argc, argv, "hv")) != -1) + { + switch (opt) + { + case 'h': + printf(usage, argv[0]); + return 0; + case 'v': + printf("%s\n", variorum_get_current_version()); + return 0; + default: + fprintf(stderr, usage, argv[0]); + return -1; + } + } + char *s = NULL; + ret = variorum_get_node_frequency_json(&s); + if (ret != 0) + { + printf("Variorum get frequency json failure!\n"); + free(s); + exit(-1); + } + puts(s); + free(s); + + return ret; +} diff --git a/src/variorum/AMD/config_amd.c b/src/variorum/AMD/config_amd.c index c885c9a15..31a4c7558 100644 --- a/src/variorum/AMD/config_amd.c +++ b/src/variorum/AMD/config_amd.c @@ -77,6 +77,7 @@ int set_amd_func_ptrs(int idx) g_platform[idx].variorum_get_node_power_json = amd_cpu_epyc_get_node_power_json; g_platform[idx].variorum_get_node_power_domain_info_json = amd_cpu_epyc_get_node_power_domain_info_json; + g_platform[idx].variorum_get_frequency_json = amd_cpu_epyc_get_json_boostlimit; break; default: fprintf(stdout, "ESMI not initialized, drivers not found. " diff --git a/src/variorum/AMD/epyc.c b/src/variorum/AMD/epyc.c index cbbf095bd..a6601ecc5 100644 --- a/src/variorum/AMD/epyc.c +++ b/src/variorum/AMD/epyc.c @@ -499,6 +499,51 @@ int amd_cpu_epyc_print_boostlimit() return 0; } +int amd_cpu_epyc_get_json_boostlimit(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n\n", __FUNCTION__); + } + + int socket, core, ret; + uint32_t boostlimit; + + int num_sockets = g_platform[P_AMD_CPU_IDX].num_sockets; + int total_cores = g_platform[P_AMD_CPU_IDX].total_cores; + int cores_per_socket = total_cores / num_sockets; + int current_core = 0; + + for (socket = 0; socket < num_sockets; ++socket) + { + char socket_name[16]; + snprintf(socket_name, 16, "socket_%d", socket); + json_t *socket_obj = json_object_get(get_clock_obj_json, socket_name); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(get_clock_obj_json, socket_name, socket_obj); + } + + json_t *cpu_obj = json_object(); + json_object_set_new(socket_obj, "CPU", cpu_obj); + + json_t *core_obj = json_object(); + json_object_set_new(cpu_obj, "core", core_obj); + + for (core = 0; core < cores_per_socket; ++core) + { + ret = esmi_core_boostlimit_get(current_core, &boostlimit); + char core_avg_string[24]; + snprintf(core_avg_string, 24, "core_%d_avg_freq_mhz", current_core); + json_object_set_new(core_obj, core_avg_string, json_real(boostlimit)); + current_core++; + } + } + return 0; +} + int amd_cpu_epyc_set_each_core_boostlimit(int boostlimit) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/AMD/epyc.h b/src/variorum/AMD/epyc.h index 78ebc5f7c..0b61d484b 100644 --- a/src/variorum/AMD/epyc.h +++ b/src/variorum/AMD/epyc.h @@ -54,4 +54,8 @@ int amd_cpu_epyc_get_node_power_domain_info_json( char **get_domain_obj_str ); +int amd_cpu_epyc_get_json_boostlimit( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/AMD_GPU/amd_gpu_power_features.c b/src/variorum/AMD_GPU/amd_gpu_power_features.c index 549378fb3..296620f15 100644 --- a/src/variorum/AMD_GPU/amd_gpu_power_features.c +++ b/src/variorum/AMD_GPU/amd_gpu_power_features.c @@ -579,6 +579,92 @@ void get_clocks_data(int chipid, int total_sockets, int verbose, FILE *output) } } +void get_clocks_json(int chipid, int total_sockets, json_t *output) +{ + rsmi_status_t ret; + uint32_t num_devices; + int gpus_per_socket; + char socketID[16]; + + snprintf(socketID, 16, "socket_%d", chipid); + + ret = rsmi_init(0); + if (ret != RSMI_STATUS_SUCCESS) + { + variorum_error_handler("Could not initialize RSMI", + VARIORUM_ERROR_PLATFORM_ENV, + getenv("HOSTNAME"), __FILE__, __FUNCTION__, + __LINE__); + } + + ret = rsmi_num_monitor_devices(&num_devices); + if (ret != RSMI_STATUS_SUCCESS) + { + variorum_error_handler("Could not get number of GPU devices", + VARIORUM_ERROR_PLATFORM_ENV, + getenv("HOSTNAME"), __FILE__, __FUNCTION__, + __LINE__); + } + + gpus_per_socket = num_devices / total_sockets; + + json_t *socket_obj = json_object_get(output, socketID); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(output, socketID, socket_obj); + } + + json_t *gpu_obj = json_object(); + json_object_set_new(socket_obj, "GPU", gpu_obj); + + for (int i = chipid * gpus_per_socket; + i < (chipid + 1) * gpus_per_socket; i++) + { + rsmi_frequencies_t f_sys, f_mem; + uint32_t f_sys_val, f_mem_val; + + ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f_sys); + if (ret != RSMI_STATUS_SUCCESS) + { + variorum_error_handler("RSMI API was not successful", + VARIORUM_ERROR_PLATFORM_ENV, + getenv("HOSTNAME"), __FILE__, __FUNCTION__, + __LINE__); + } + + ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f_mem); + if (ret != RSMI_STATUS_SUCCESS) + { + variorum_error_handler("RSMI API was not successful", + VARIORUM_ERROR_PLATFORM_ENV, + getenv("HOSTNAME"), __FILE__, __FUNCTION__, + __LINE__); + } + + f_sys_val = f_sys.frequency[f_sys.current] / (1000 * 1000); // Convert to MHz + f_mem_val = f_mem.frequency[f_mem.current] / (1000 * 1000); // Convert to MHz + + char gpu_clock_string[32]; + snprintf(gpu_clock_string, 32, "gpu_%d_freq_mhz", i); + + char gpu_mem_clock_string[32]; + snprintf(gpu_mem_clock_string, 32, "gpu_%d_mem_freq_mhz", i); + + json_object_set_new(gpu_obj, gpu_clock_string, json_integer(f_sys_val)); + json_object_set_new(gpu_obj, gpu_mem_clock_string, json_integer(f_mem_val)); + } + + ret = rsmi_shut_down(); + if (ret != RSMI_STATUS_SUCCESS) + { + variorum_error_handler("Could not shutdown RSMI", + VARIORUM_ERROR_PLATFORM_ENV, + getenv("HOSTNAME"), __FILE__, __FUNCTION__, + __LINE__); + } +} + void get_gpu_utilization_data(int chipid, int total_sockets, int verbose, FILE *output) { diff --git a/src/variorum/AMD_GPU/amd_gpu_power_features.h b/src/variorum/AMD_GPU/amd_gpu_power_features.h index fce62f721..5da912910 100644 --- a/src/variorum/AMD_GPU/amd_gpu_power_features.h +++ b/src/variorum/AMD_GPU/amd_gpu_power_features.h @@ -59,4 +59,10 @@ void get_thermals_json( json_t *output ); +void get_clocks_json( + int chipid, + int total_sockets, + json_t *output +); + #endif diff --git a/src/variorum/AMD_GPU/config_amd_gpu.c b/src/variorum/AMD_GPU/config_amd_gpu.c index 8548a784c..d31389398 100644 --- a/src/variorum/AMD_GPU/config_amd_gpu.c +++ b/src/variorum/AMD_GPU/config_amd_gpu.c @@ -34,6 +34,7 @@ int set_amd_gpu_func_ptrs(int idx) g_platform[idx].variorum_print_gpu_utilization = amd_gpu_instinct_get_gpu_utilization; g_platform[idx].variorum_get_thermals_json = amd_gpu_instinct_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = amd_gpu_instinct_get_clocks_json; /* Initialize control interfaces */ g_platform[idx].variorum_cap_each_gpu_power_limit = amd_gpu_instinct_cap_each_gpu_power_limit; diff --git a/src/variorum/AMD_GPU/instinctGPU.c b/src/variorum/AMD_GPU/instinctGPU.c index 403f3fa43..960ebd30f 100644 --- a/src/variorum/AMD_GPU/instinctGPU.c +++ b/src/variorum/AMD_GPU/instinctGPU.c @@ -120,6 +120,27 @@ int amd_gpu_instinct_get_clocks(int verbose) return 0; } +int amd_gpu_instinct_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + unsigned iter = 0; + unsigned nsockets; + + variorum_get_topology(&nsockets, NULL, NULL, P_AMD_GPU_IDX); + + for (iter = 0; iter < nsockets; iter++) + { + get_clocks_json(iter, nsockets, get_clock_obj_json); + } + + return 0; +} + int amd_gpu_instinct_get_gpu_utilization(int verbose) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/AMD_GPU/instinctGPU.h b/src/variorum/AMD_GPU/instinctGPU.h index 6769d3305..cfb8e92ff 100644 --- a/src/variorum/AMD_GPU/instinctGPU.h +++ b/src/variorum/AMD_GPU/instinctGPU.h @@ -37,4 +37,8 @@ int amd_gpu_instinct_get_thermals_json( json_t *get_thermal_obj ); +int amd_gpu_instinct_get_clocks_json( + json_t *get_clocks_obj_json +); + #endif diff --git a/src/variorum/IBM/Power9.c b/src/variorum/IBM/Power9.c index a2528f874..3987ac24d 100644 --- a/src/variorum/IBM/Power9.c +++ b/src/variorum/IBM/Power9.c @@ -571,3 +571,64 @@ int ibm_cpu_p9_get_node_power_domain_info_json(char **get_domain_obj_str) return 0; } + +int ibm_cpu_p9_get_node_frequency_json(json_t *get_frequency_obj_json) +{ + char *val = ("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + void *buf; + int fd; + int rc; + int bytes; + unsigned iter = 0; + unsigned nsockets; + +#ifdef VARIORUM_WITH_IBM_CPU + variorum_get_topology(&nsockets, NULL, NULL, P_IBM_CPU_IDX); +#endif + + fd = open("/sys/firmware/opal/exports/occ_inband_sensors", O_RDONLY); + if (fd < 0) + { + printf("Failed to open occ_inband_sensors file\n"); + return -1; + } + + for (iter = 0; iter < nsockets; iter++) + { + lseek(fd, iter * OCC_SENSOR_DATA_BLOCK_SIZE, SEEK_SET); + + buf = malloc(OCC_SENSOR_DATA_BLOCK_SIZE); + if (!buf) + { + printf("Failed to allocate\n"); + return -1; + } + + for (rc = bytes = 0; bytes < OCC_SENSOR_DATA_BLOCK_SIZE; bytes += rc) + { + rc = read(fd, buf + bytes, OCC_SENSOR_DATA_BLOCK_SIZE - bytes); + + if (!rc || rc < 0) + { + break; + } + } + + if (bytes != OCC_SENSOR_DATA_BLOCK_SIZE) + { + printf("Failed to read data\n"); + free(buf); + return -1; + } + json_get_frequency_sensors(iter, get_frequency_obj_json, buf); + free(buf); + } + + close(fd); + return 0; +} diff --git a/src/variorum/IBM/Power9.h b/src/variorum/IBM/Power9.h index e8fb6cd6f..effd0e647 100644 --- a/src/variorum/IBM/Power9.h +++ b/src/variorum/IBM/Power9.h @@ -44,4 +44,8 @@ int ibm_cpu_p9_get_node_thermal_json( json_t *get_thermal_obj ); +int ibm_cpu_p9_get_node_frequency_json( + json_t *get_frequency_obj_json +); + #endif diff --git a/src/variorum/IBM/config_ibm.c b/src/variorum/IBM/config_ibm.c index dea1a29b0..a4e499f63 100644 --- a/src/variorum/IBM/config_ibm.c +++ b/src/variorum/IBM/config_ibm.c @@ -37,6 +37,8 @@ int set_ibm_func_ptrs(int idx) g_platform[idx].variorum_get_node_power_domain_info_json = ibm_cpu_p9_get_node_power_domain_info_json; g_platform[idx].variorum_get_thermals_json = ibm_cpu_p9_get_node_thermal_json; + g_platform[idx].variorum_get_frequency_json = + ibm_cpu_p9_get_node_frequency_json; } else { diff --git a/src/variorum/IBM/ibm_power_features.c b/src/variorum/IBM/ibm_power_features.c index 6cef1255d..0ba5e781b 100644 --- a/src/variorum/IBM/ibm_power_features.c +++ b/src/variorum/IBM/ibm_power_features.c @@ -517,3 +517,42 @@ void json_get_thermal_sensors(int chipid, json_t *node_obj, const void *buf) } } } + +void json_get_frequency_sensors(int chipid, json_t *node_obj, const void *buf) +{ + struct occ_sensor_data_header *hb; + struct occ_sensor_name *md; + int i = 0; + + hb = (struct occ_sensor_data_header *)(uint64_t)buf; + md = (struct occ_sensor_name *)((uint64_t)hb + be32toh(hb->names_offset)); + + char socketID[12]; + snprintf(socketID, 12, "socket_%d", chipid); + + json_t *socket_obj = json_object_get(node_obj, socketID); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(node_obj, socketID, socket_obj); + } + + json_t *cpu_obj = json_object(); + json_object_set_new(socket_obj, "CPU", cpu_obj); + + for (i = 0; i < be16toh(hb->nr_sensors); i++) + { + uint32_t offset = be32toh(md[i].reading_offset); + uint64_t sample = 0; + + if (md[i].structure_type == OCC_SENSOR_READING_FULL) + { + sample = read_sensor(hb, offset, SENSOR_SAMPLE); + } + + if (strcmp(md[i].name, "FREQA") == 0) + { + json_object_set_new(cpu_obj, "cpu_avg_freq_mhz", json_integer(sample)); + } + } +} diff --git a/src/variorum/IBM/ibm_power_features.h b/src/variorum/IBM/ibm_power_features.h index 884966a1f..1f414d99e 100644 --- a/src/variorum/IBM/ibm_power_features.h +++ b/src/variorum/IBM/ibm_power_features.h @@ -171,4 +171,10 @@ void json_get_thermal_sensors( const void *buf ); +void json_get_frequency_sensors( + int chipid, + json_t *node_obj, + const void *buf +); + #endif diff --git a/src/variorum/Intel/Intel_06_2A.c b/src/variorum/Intel/Intel_06_2A.c index e060f080a..c455c1d52 100644 --- a/src/variorum/Intel/Intel_06_2A.c +++ b/src/variorum/Intel/Intel_06_2A.c @@ -317,6 +317,20 @@ int intel_cpu_fm_06_2a_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_2a_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; +} + int intel_cpu_fm_06_2a_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_2A.h b/src/variorum/Intel/Intel_06_2A.h index 804a57c6d..c162c7730 100644 --- a/src/variorum/Intel/Intel_06_2A.h +++ b/src/variorum/Intel/Intel_06_2A.h @@ -146,4 +146,8 @@ int intel_cpu_fm_06_2a_get_thermals_json( json_t *get_thermal_obj ); +int intel_cpu_fm_06_2a_get_clocks_json( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/Intel/Intel_06_2D.c b/src/variorum/Intel/Intel_06_2D.c index 19168e78b..bcbc80e00 100644 --- a/src/variorum/Intel/Intel_06_2D.c +++ b/src/variorum/Intel/Intel_06_2D.c @@ -320,6 +320,21 @@ int intel_cpu_fm_06_2d_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_2d_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; + +} + int intel_cpu_fm_06_2d_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_2D.h b/src/variorum/Intel/Intel_06_2D.h index 019bf9455..667c8e838 100644 --- a/src/variorum/Intel/Intel_06_2D.h +++ b/src/variorum/Intel/Intel_06_2D.h @@ -149,4 +149,8 @@ int intel_cpu_fm_06_2d_get_thermals_json( json_t *get_thermal_obj ); +int intel_cpu_fm_06_2d_get_clocks_json( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/Intel/Intel_06_3E.c b/src/variorum/Intel/Intel_06_3E.c index 468a92163..301819307 100644 --- a/src/variorum/Intel/Intel_06_3E.c +++ b/src/variorum/Intel/Intel_06_3E.c @@ -348,6 +348,20 @@ int intel_cpu_fm_06_3e_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_3e_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; +} + int intel_cpu_fm_06_3e_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_3E.h b/src/variorum/Intel/Intel_06_3E.h index f0af5020a..eeb86df4c 100644 --- a/src/variorum/Intel/Intel_06_3E.h +++ b/src/variorum/Intel/Intel_06_3E.h @@ -149,4 +149,8 @@ int intel_cpu_fm_06_3e_get_thermals_json( json_t *get_thermal_obj ); +int intel_cpu_fm_06_3e_get_clocks_json( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/Intel/Intel_06_3F.c b/src/variorum/Intel/Intel_06_3F.c index acc35b1af..a31be6ced 100644 --- a/src/variorum/Intel/Intel_06_3F.c +++ b/src/variorum/Intel/Intel_06_3F.c @@ -341,6 +341,21 @@ int intel_cpu_fm_06_3f_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_3f_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + + return 0; +} + int intel_cpu_fm_06_3f_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_3F.h b/src/variorum/Intel/Intel_06_3F.h index 3225a6764..d9320e864 100644 --- a/src/variorum/Intel/Intel_06_3F.h +++ b/src/variorum/Intel/Intel_06_3F.h @@ -151,4 +151,8 @@ int intel_cpu_fm_06_3f_get_thermals_json( json_t *get_thermal_obj ); +int intel_cpu_fm_06_3f_get_clocks_json( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/Intel/Intel_06_4F.c b/src/variorum/Intel/Intel_06_4F.c index 887d5e12d..ea08b2932 100644 --- a/src/variorum/Intel/Intel_06_4F.c +++ b/src/variorum/Intel/Intel_06_4F.c @@ -356,6 +356,20 @@ int intel_cpu_fm_06_4f_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_4f_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; +} + int intel_cpu_fm_06_4f_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_4F.h b/src/variorum/Intel/Intel_06_4F.h index bab77062c..694dcc219 100644 --- a/src/variorum/Intel/Intel_06_4F.h +++ b/src/variorum/Intel/Intel_06_4F.h @@ -151,4 +151,8 @@ int intel_cpu_fm_06_4f_get_thermals_json( json_t *get_thermal_obj ); +int intel_cpu_fm_06_4f_get_clocks_json( + json_t *get_clock_obj_json +); + #endif diff --git a/src/variorum/Intel/Intel_06_55.c b/src/variorum/Intel/Intel_06_55.c index b9dce0e8d..c0ff48ad1 100644 --- a/src/variorum/Intel/Intel_06_55.c +++ b/src/variorum/Intel/Intel_06_55.c @@ -322,6 +322,20 @@ int intel_cpu_fm_06_55_get_clocks(int long_ver) return 0; } +int intel_cpu_fm_06_55_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; +} + int intel_cpu_fm_06_55_get_power(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_55.h b/src/variorum/Intel/Intel_06_55.h index 67f80b340..02b73e1a3 100644 --- a/src/variorum/Intel/Intel_06_55.h +++ b/src/variorum/Intel/Intel_06_55.h @@ -114,4 +114,6 @@ int intel_cpu_fm_06_55_get_frequencies(void); int intel_cpu_fm_06_55_get_thermals_json(json_t *get_thermal_obj); +int intel_cpu_fm_06_55_get_clocks_json(json_t *get_clock_obj_json); + #endif diff --git a/src/variorum/Intel/Intel_06_9E.c b/src/variorum/Intel/Intel_06_9E.c index 5734405a7..2348cb767 100644 --- a/src/variorum/Intel/Intel_06_9E.c +++ b/src/variorum/Intel/Intel_06_9E.c @@ -432,6 +432,20 @@ int intel_cpu_fm_06_9e_get_thermals_json(json_t *get_thermal_obj) return 0; } +int intel_cpu_fm_06_9e_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + get_clocks_data_json(get_clock_obj_json, msrs.ia32_aperf, msrs.ia32_mperf, + msrs.ia32_time_stamp_counter, msrs.ia32_perf_status, msrs.msr_platform_info, + CORE); + return 0; +} + int intel_cpu_fm_06_9e_cap_best_effort_node_power_limit(int node_limit) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Intel/Intel_06_9E.h b/src/variorum/Intel/Intel_06_9E.h index 24bea061a..dcaa810a5 100644 --- a/src/variorum/Intel/Intel_06_9E.h +++ b/src/variorum/Intel/Intel_06_9E.h @@ -112,4 +112,6 @@ int intel_cpu_fm_06_9e_get_frequencies(void); int intel_cpu_fm_06_9e_get_thermals_json(json_t *get_thermal_obj); +int intel_cpu_fm_06_9e_get_clocks_json(json_t *get_clock_obj_json); + #endif diff --git a/src/variorum/Intel/clocks_features.c b/src/variorum/Intel/clocks_features.c index fff09d75f..e487ce059 100644 --- a/src/variorum/Intel/clocks_features.c +++ b/src/variorum/Intel/clocks_features.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -300,6 +301,94 @@ int print_verbose_clocks_data(FILE *writedest, off_t msr_aperf, off_t msr_mperf, return 0; } +json_t *make_socket_obj(json_t *node_obj, int socket_index) +{ + char socket_name[16]; + snprintf(socket_name, 16, "socket_%d", socket_index); + json_t *socket_obj = json_object_get(node_obj, socket_name); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(node_obj, socket_name, socket_obj); + } + return socket_obj; +} + +int get_clocks_data_json(json_t *output, off_t msr_aperf, off_t msr_mperf, + off_t msr_tsc, off_t msr_perf_status, off_t msr_platform_info, + enum ctl_domains_e control_domains) +{ + static struct clocks_data *cd; + static struct perf_data *pd; + unsigned i, j, k; + int idx; + unsigned nsockets, ncores, nthreads; + int max_non_turbo_ratio; + int err; + float socket_average_freq = 0.0; + + err = get_max_non_turbo_ratio(msr_platform_info, &max_non_turbo_ratio); + if (err) + { + variorum_error_handler("Error retrieving max non-turbo ratio", + VARIORUM_ERROR_FUNCTION, getenv("HOSTNAME"), + __FILE__, __FUNCTION__, __LINE__); + return -1; + } + + variorum_get_topology(&nsockets, &ncores, &nthreads, P_INTEL_CPU_IDX); + + clocks_storage(&cd, msr_aperf, msr_mperf, msr_tsc); + perf_storage(&pd, msr_perf_status); + read_batch(CLOCKS_DATA); + read_batch(PERF_DATA); + + //use array to store core frequencies; + double core_frequencies[ncores]; + memset(core_frequencies, 0.0, ncores * sizeof(double)); + + switch (control_domains) + { + case CORE: + for (i = 0; i < nsockets; i++) + { + socket_average_freq = 0.0; + json_t *socket_obj = make_socket_obj(output, i); + json_t *cpu_obj = json_object(); + json_object_set_new(socket_obj, "CPU", cpu_obj); + json_t *core_obj = json_object(); + json_object_set_new(cpu_obj, "core", core_obj); + + for (j = 0; j < ncores / nsockets; j++) + { + int core_freq_index = i * (ncores / nsockets) + j; + for (k = 0; k < nthreads / ncores; k++) + { + idx = (k * nsockets * (ncores / nsockets)) + (i * (ncores / nsockets)) + j; + core_frequencies[core_freq_index] += (max_non_turbo_ratio * (*cd->aperf[idx] / + (double)(*cd->mperf[idx]))); + } + core_frequencies[core_freq_index] /= 2; + socket_average_freq += core_frequencies[core_freq_index]; + + char core_avg_string[24]; + snprintf(core_avg_string, 24, "core_%d_avg_freq_mhz", j); + + json_object_set_new(core_obj, core_avg_string, + json_real(core_frequencies[core_freq_index])); + } + socket_average_freq /= (ncores / nsockets); + json_object_set_new(cpu_obj, "cpu_avg_freq_mhz", + json_real(socket_average_freq)); + } + break; + default: + fprintf(stderr, "Not a valid control domain.\n"); + break; + } + return 0; +} + //void print_verbose_clocks_data_socket(FILE *writedest, off_t msr_aperf, off_t msr_mperf, off_t msr_tsc, off_t msr_perf_status, off_t msr_platform_info) //{ // static struct clocks_data *cd; diff --git a/src/variorum/Intel/clocks_features.h b/src/variorum/Intel/clocks_features.h index b0267209f..ab5834046 100644 --- a/src/variorum/Intel/clocks_features.h +++ b/src/variorum/Intel/clocks_features.h @@ -150,6 +150,21 @@ void get_available_frequencies_skx( off_t *msr_config_tdp_l2 ); +int get_clocks_data_json( + json_t *output, + off_t msr_aperf, + off_t msr_mperf, + off_t msr_tsc, + off_t msr_perf_status, + off_t msr_platform_info, + enum ctl_domains_e control_domain +); + +json_t *make_socket_obj( + json_t *node_obj, + int socket_index +); + ///// @brief Print current p-state. ///// ///// @param [in] writedest File stream where output will be written to. diff --git a/src/variorum/Intel/config_intel.c b/src/variorum/Intel/config_intel.c index be491b64f..e21a34a1b 100644 --- a/src/variorum/Intel/config_intel.c +++ b/src/variorum/Intel/config_intel.c @@ -97,6 +97,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_2a_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_2a_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_2a_get_clocks_json; } else if (*g_platform[idx].arch_id == FM_06_2D) { @@ -126,6 +128,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_2d_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_2d_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_2d_get_clocks_json; } // Ivy Bridge 06_3E else if (*g_platform[idx].arch_id == FM_06_3E) @@ -156,6 +160,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_3e_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_3e_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_3e_get_clocks_json; } // Haswell 06_3F else if (*g_platform[idx].arch_id == FM_06_3F) @@ -216,6 +222,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_4f_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_4f_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_3f_get_clocks_json; } // Skylake 06_55 else if (*g_platform[idx].arch_id == FM_06_55) @@ -246,6 +254,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_55_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_55_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_55_get_clocks_json; } // Kaby Lake 06_9E else if (*g_platform[idx].arch_id == FM_06_9E) @@ -274,6 +284,8 @@ int set_intel_func_ptrs(int idx) intel_cpu_fm_06_9e_get_frequencies; g_platform[idx].variorum_get_thermals_json = intel_cpu_fm_06_9e_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = + intel_cpu_fm_06_9e_get_clocks_json; } // Ice Lake 06_6A else if (*g_platform[idx].arch_id == FM_06_6A) diff --git a/src/variorum/Nvidia_GPU/Volta.c b/src/variorum/Nvidia_GPU/Volta.c index dbfa352e8..83f869c54 100644 --- a/src/variorum/Nvidia_GPU/Volta.c +++ b/src/variorum/Nvidia_GPU/Volta.c @@ -91,6 +91,27 @@ int volta_get_clocks(int long_ver) return 0; } +int volta_get_clocks_json(json_t *get_clock_obj_json) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + unsigned iter = 0; + unsigned nsockets = 0; +#ifdef VARIORUM_WITH_NVIDIA_GPU + variorum_get_topology(&nsockets, NULL, NULL, P_NVIDIA_GPU_IDX); +#endif + + for (iter = 0; iter < nsockets; iter++) + { + nvidia_gpu_get_clocks_json(iter, get_clock_obj_json); + } + return 0; +} + int volta_get_power_limits(int long_ver) { char *val = getenv("VARIORUM_LOG"); diff --git a/src/variorum/Nvidia_GPU/Volta.h b/src/variorum/Nvidia_GPU/Volta.h index 1e4146985..676d80214 100644 --- a/src/variorum/Nvidia_GPU/Volta.h +++ b/src/variorum/Nvidia_GPU/Volta.h @@ -36,4 +36,6 @@ int volta_get_thermals_json( json_t *get_thermal_obj ); +int volta_get_clocks_json(json_t *get_clock_obj_json); + #endif diff --git a/src/variorum/Nvidia_GPU/config_nvidia.c b/src/variorum/Nvidia_GPU/config_nvidia.c index 03b13a357..8968bdea4 100644 --- a/src/variorum/Nvidia_GPU/config_nvidia.c +++ b/src/variorum/Nvidia_GPU/config_nvidia.c @@ -31,6 +31,7 @@ int set_nvidia_func_ptrs(int idx) g_platform[idx].variorum_print_power_limit = volta_get_power_limits; g_platform[idx].variorum_print_gpu_utilization = volta_get_gpu_utilization; g_platform[idx].variorum_get_thermals_json = volta_get_thermals_json; + g_platform[idx].variorum_get_frequency_json = volta_get_clocks_json; /* Initialize control interfaces */ g_platform[idx].variorum_cap_each_gpu_power_limit = volta_cap_each_gpu_power_limit; diff --git a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c index edc72eba9..abf63cacb 100644 --- a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c +++ b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c @@ -323,6 +323,43 @@ void nvidia_gpu_get_clocks_data(int chipid, int verbose, FILE *output) } } +void nvidia_gpu_get_clocks_json(int chipid, json_t *output) +{ + unsigned int gpu_clock; + unsigned int mem_clock; + int d; + + char socket_id[16]; + snprintf(socket_id, 16, "socket_%d", chipid); + + json_t *socket_obj = json_object_get(output, socket_id); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(output, socket_id, socket_obj); + } + + json_t *gpu_obj = json_object(); + json_object_set_new(socket_obj, "GPU", gpu_obj); + + for (d = chipid * (int)m_gpus_per_socket; + d < (chipid + 1) * (int)m_gpus_per_socket; ++d) + { + nvmlDeviceGetClock(m_unit_devices_file_desc[d], NVML_CLOCK_SM, + NVML_CLOCK_ID_CURRENT, &gpu_clock); + nvmlDeviceGetClock(m_unit_devices_file_desc[d], NVML_CLOCK_MEM, + NVML_CLOCK_ID_CURRENT, &mem_clock); + char gpu_sm_clock_str[32]; + snprintf(gpu_sm_clock_str, 32, "gpu_%d_freq_mhz", d); + + char gpu_mem_clock_str[32]; + snprintf(gpu_mem_clock_str, 32, "gpu_%d_mem_freq_mhz", d); + + json_object_set_new(gpu_obj, gpu_sm_clock_str, json_integer(gpu_clock)); + json_object_set_new(gpu_obj, gpu_mem_clock_str, json_integer(mem_clock)); + } +} + void nvidia_gpu_get_gpu_utilization_data(int chipid, int verbose, FILE *output) { nvmlUtilization_t util; diff --git a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h index 270fc6693..69f1f9abb 100644 --- a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h +++ b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h @@ -64,4 +64,9 @@ void nvidia_gpu_get_thermal_json( json_t *output ); +void nvidia_gpu_get_clocks_json( + int chipid, + json_t *output +); + #endif diff --git a/src/variorum/config_architecture.c b/src/variorum/config_architecture.c index 329094dc6..f551d3598 100644 --- a/src/variorum/config_architecture.c +++ b/src/variorum/config_architecture.c @@ -359,6 +359,7 @@ void variorum_init_func_ptrs() g_platform[i].variorum_get_node_power_domain_info_json = NULL; g_platform[i].variorum_print_energy = NULL; g_platform[i].variorum_get_thermals_json = NULL; + g_platform[i].variorum_get_frequency_json = NULL; } } diff --git a/src/variorum/config_architecture.h b/src/variorum/config_architecture.h index d4f61527a..33bada1f6 100644 --- a/src/variorum/config_architecture.h +++ b/src/variorum/config_architecture.h @@ -247,6 +247,11 @@ struct platform /// @return Error code. int (*variorum_get_node_power_domain_info_json)(char **get_domain_obj_str); + /// @brief Function pointer to get JSON object for frequency information + /// + /// @return Error code. + int (*variorum_get_frequency_json)(json_t *get_clock_obj_json); + /// @brief Function pointer to get JSON object for thermal information /// /// @return Error code. diff --git a/src/variorum/variorum.c b/src/variorum/variorum.c index 560bce917..053aa1378 100644 --- a/src/variorum/variorum.c +++ b/src/variorum/variorum.c @@ -1202,6 +1202,57 @@ int variorum_get_thermals_json(char **get_thermal_obj_str) return err; } +int variorum_get_node_frequency_json(char **get_frequency_obj_str) +{ + int err = 0; + int i; + char hostname[1024]; + uint64_t ts; + struct timeval tv; + gethostname(hostname, 1024); + gettimeofday(&tv, NULL); + + err = variorum_enter(__FILE__, __FUNCTION__, __LINE__); + if (err) + { + return -1; + } + + json_t *get_frequency_obj = json_object(); + json_t *node_obj = json_object(); + json_object_set_new(get_frequency_obj, hostname, node_obj); + + ts = tv.tv_sec * (uint64_t)1000000 + tv.tv_usec; + json_object_set_new(node_obj, "timestamp", json_integer(ts)); + + for (i = 0; i < P_NUM_PLATFORMS; i++) + { + if (g_platform[i].variorum_get_frequency_json == NULL) + { + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, + getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + continue; + } + err = g_platform[i].variorum_get_frequency_json(node_obj); + if (err) + { + printf("Error with variorum get frequency json platform %d\n", i); + } + } + + *get_frequency_obj_str = json_dumps(get_frequency_obj, JSON_INDENT(4)); + json_decref(get_frequency_obj); + + err = variorum_exit(__FILE__, __FUNCTION__, __LINE__); + if (err) + { + return -1; + } + return err; +} + int variorum_print_available_frequencies(void) { int err = 0; diff --git a/src/variorum/variorum.h b/src/variorum/variorum.h index ddb5b9eaa..edc9e16bd 100644 --- a/src/variorum/variorum.h +++ b/src/variorum/variorum.h @@ -603,6 +603,27 @@ int variorum_get_node_power_domain_info_json(char **get_domain_obj_str); /// check for NULL strings. int variorum_get_thermals_json(char **get_thermal_obj_str); +/// @brief Populate a string in JSON format with node level frequency information +/// +/// @supparch +/// - Intel Sandy Bridge +/// - Intel Ivy Bridge +/// - Intel Haswell +/// - Intel Broadwell +/// - Intel Skylake +/// - Intel Kabylake +/// - IBM Power9 +/// - AMD Instinct +/// - Nvidia Volta +/// +/// @param [out] get_frequency_obj_str String (passed by reference) that contains the +/// node-level frequency information. +/// +/// @return 0 if successful, otherwise -1. Note that feature not implemented +/// returns a -1 for the JSON APIs so that users don't have to explicitly +/// check for NULL strings. +int variorum_get_node_frequency_json(char **get_frequency_obj_str); + /// @brief Returns Variorum version as a constant string. /// /// @supparch