Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

json output for frequency data #488

Merged
merged 9 commits into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/docs/sphinx/api/json_support_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ Defined in ``variorum/variorum.h``.
.. doxygenfunction:: variorum_get_node_power_domain_info_json

.. doxygenfunction:: variorum_get_thermals_json

.. doxygenfunction:: variorum_get_node_frequency_json
1 change: 1 addition & 0 deletions src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(BASIC_EXAMPLES
variorum-cap-socket-power-limit-example
variorum-disable-turbo-example
variorum-enable-turbo-example
variorum-get-node-frequency-json-example
variorum-get-node-power-domain-info-json-example
variorum-get-node-power-json-example
variorum-get-node-thermal-json-example
Expand Down
45 changes: 45 additions & 0 deletions src/examples/variorum-get-node-frequency-json-example.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2019-2023 Lawrence Livermore National Security, LLC and other
// Variorum Project Developers. See the top-level LICENSE file for details.
//
// SPDX-License-Identifier: MIT

#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>

#include <variorum.h>

int main(int argc, char **argv)
{
int ret;

const char *usage = "Usage: %s [-h] [-v]\n";
int opt;
while ((opt = getopt(argc, argv, "hv")) != -1)
{
switch (opt)
{
case 'h':
printf(usage, argv[0]);
return 0;
case 'v':
printf("%s\n", variorum_get_current_version());
return 0;
default:
fprintf(stderr, usage, argv[0]);
return -1;
}
}
char *s = NULL;
ret = variorum_get_node_frequency_json(&s);
if (ret != 0)
{
printf("Variorum get frequency json failure!\n");
free(s);
exit(-1);
}
puts(s);
free(s);

return ret;
}
1 change: 1 addition & 0 deletions src/variorum/AMD/config_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ int set_amd_func_ptrs(int idx)
g_platform[idx].variorum_get_node_power_json = amd_cpu_epyc_get_node_power_json;
g_platform[idx].variorum_get_node_power_domain_info_json =
amd_cpu_epyc_get_node_power_domain_info_json;
g_platform[idx].variorum_get_frequency_json = amd_cpu_epyc_get_json_boostlimit;
break;
default:
fprintf(stdout, "ESMI not initialized, drivers not found. "
Expand Down
45 changes: 45 additions & 0 deletions src/variorum/AMD/epyc.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,51 @@ int amd_cpu_epyc_print_boostlimit()
return 0;
}

int amd_cpu_epyc_get_json_boostlimit(json_t *get_clock_obj_json)
{
char *val = getenv("VARIORUM_LOG");
if (val != NULL && atoi(val) == 1)
{
printf("Running %s\n\n", __FUNCTION__);
}

int socket, core, ret;
uint32_t boostlimit;

int num_sockets = g_platform[P_AMD_CPU_IDX].num_sockets;
int total_cores = g_platform[P_AMD_CPU_IDX].total_cores;
int cores_per_socket = total_cores / num_sockets;
int current_core = 0;

for (socket = 0; socket < num_sockets; ++socket)
{
char socket_name[16];
snprintf(socket_name, 16, "socket_%d", socket);
json_t *socket_obj = json_object_get(get_clock_obj_json, socket_name);
if (socket_obj == NULL)
{
socket_obj = json_object();
json_object_set_new(get_clock_obj_json, socket_name, socket_obj);
}

json_t *cpu_obj = json_object();
json_object_set_new(socket_obj, "CPU", cpu_obj);

json_t *core_obj = json_object();
json_object_set_new(cpu_obj, "core", core_obj);

for (core = 0; core < cores_per_socket; ++core)
{
ret = esmi_core_boostlimit_get(current_core, &boostlimit);
char core_avg_string[24];
snprintf(core_avg_string, 24, "core_%d_avg_freq_mhz", current_core);
json_object_set_new(core_obj, core_avg_string, json_real(boostlimit));
current_core++;
}
}
return 0;
}

int amd_cpu_epyc_set_each_core_boostlimit(int boostlimit)
{
char *val = getenv("VARIORUM_LOG");
Expand Down
4 changes: 4 additions & 0 deletions src/variorum/AMD/epyc.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,8 @@ int amd_cpu_epyc_get_node_power_domain_info_json(
char **get_domain_obj_str
);

int amd_cpu_epyc_get_json_boostlimit(
json_t *get_clock_obj_json
);

#endif
86 changes: 86 additions & 0 deletions src/variorum/AMD_GPU/amd_gpu_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,92 @@ void get_clocks_data(int chipid, int total_sockets, int verbose, FILE *output)
}
}

void get_clocks_json(int chipid, int total_sockets, json_t *output)
{
rsmi_status_t ret;
uint32_t num_devices;
int gpus_per_socket;
char socketID[16];

snprintf(socketID, 16, "socket_%d", chipid);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not get number of GPU devices",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
}

gpus_per_socket = num_devices / total_sockets;

json_t *socket_obj = json_object_get(output, socketID);
if (socket_obj == NULL)
{
socket_obj = json_object();
json_object_set_new(output, socketID, socket_obj);
}

json_t *gpu_obj = json_object();
json_object_set_new(socket_obj, "GPU", gpu_obj);

for (int i = chipid * gpus_per_socket;
i < (chipid + 1) * gpus_per_socket; i++)
{
rsmi_frequencies_t f_sys, f_mem;
uint32_t f_sys_val, f_mem_val;

ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f_sys);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("RSMI API was not successful",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
}

ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f_mem);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("RSMI API was not successful",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
}

f_sys_val = f_sys.frequency[f_sys.current] / (1000 * 1000); // Convert to MHz
f_mem_val = f_mem.frequency[f_mem.current] / (1000 * 1000); // Convert to MHz

char gpu_clock_string[32];
snprintf(gpu_clock_string, 32, "gpu_%d_freq_mhz", i);

char gpu_mem_clock_string[32];
snprintf(gpu_mem_clock_string, 32, "gpu_%d_mem_freq_mhz", i);

json_object_set_new(gpu_obj, gpu_clock_string, json_integer(f_sys_val));
json_object_set_new(gpu_obj, gpu_mem_clock_string, json_integer(f_mem_val));
}

ret = rsmi_shut_down();
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not shutdown RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
}
}

void get_gpu_utilization_data(int chipid, int total_sockets, int verbose,
FILE *output)
{
Expand Down
6 changes: 6 additions & 0 deletions src/variorum/AMD_GPU/amd_gpu_power_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,10 @@ void get_thermals_json(
json_t *output
);

void get_clocks_json(
int chipid,
int total_sockets,
json_t *output
);

#endif
1 change: 1 addition & 0 deletions src/variorum/AMD_GPU/config_amd_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ int set_amd_gpu_func_ptrs(int idx)
g_platform[idx].variorum_print_gpu_utilization =
amd_gpu_instinct_get_gpu_utilization;
g_platform[idx].variorum_get_thermals_json = amd_gpu_instinct_get_thermals_json;
g_platform[idx].variorum_get_frequency_json = amd_gpu_instinct_get_clocks_json;
/* Initialize control interfaces */
g_platform[idx].variorum_cap_each_gpu_power_limit =
amd_gpu_instinct_cap_each_gpu_power_limit;
Expand Down
21 changes: 21 additions & 0 deletions src/variorum/AMD_GPU/instinctGPU.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,27 @@ int amd_gpu_instinct_get_clocks(int verbose)
return 0;
}

int amd_gpu_instinct_get_clocks_json(json_t *get_clock_obj_json)
{
char *val = getenv("VARIORUM_LOG");
if (val != NULL && atoi(val) == 1)
{
printf("Running %s\n", __FUNCTION__);
}

unsigned iter = 0;
unsigned nsockets;

variorum_get_topology(&nsockets, NULL, NULL, P_AMD_GPU_IDX);

for (iter = 0; iter < nsockets; iter++)
{
get_clocks_json(iter, nsockets, get_clock_obj_json);
}

return 0;
}

int amd_gpu_instinct_get_gpu_utilization(int verbose)
{
char *val = getenv("VARIORUM_LOG");
Expand Down
4 changes: 4 additions & 0 deletions src/variorum/AMD_GPU/instinctGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,8 @@ int amd_gpu_instinct_get_thermals_json(
json_t *get_thermal_obj
);

int amd_gpu_instinct_get_clocks_json(
json_t *get_clocks_obj_json
);

#endif
61 changes: 61 additions & 0 deletions src/variorum/IBM/Power9.c
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,64 @@ int ibm_cpu_p9_get_node_power_domain_info_json(char **get_domain_obj_str)

return 0;
}

int ibm_cpu_p9_get_node_frequency_json(json_t *get_frequency_obj_json)
{
char *val = ("VARIORUM_LOG");
if (val != NULL && atoi(val) == 1)
{
printf("Running %s\n", __FUNCTION__);
}

void *buf;
int fd;
int rc;
int bytes;
unsigned iter = 0;
unsigned nsockets;

#ifdef VARIORUM_WITH_IBM_CPU
variorum_get_topology(&nsockets, NULL, NULL, P_IBM_CPU_IDX);
#endif

fd = open("/sys/firmware/opal/exports/occ_inband_sensors", O_RDONLY);
if (fd < 0)
{
printf("Failed to open occ_inband_sensors file\n");
return -1;
}

for (iter = 0; iter < nsockets; iter++)
{
lseek(fd, iter * OCC_SENSOR_DATA_BLOCK_SIZE, SEEK_SET);

buf = malloc(OCC_SENSOR_DATA_BLOCK_SIZE);
if (!buf)
{
printf("Failed to allocate\n");
return -1;
}

for (rc = bytes = 0; bytes < OCC_SENSOR_DATA_BLOCK_SIZE; bytes += rc)
{
rc = read(fd, buf + bytes, OCC_SENSOR_DATA_BLOCK_SIZE - bytes);

if (!rc || rc < 0)
{
break;
}
}

if (bytes != OCC_SENSOR_DATA_BLOCK_SIZE)
{
printf("Failed to read data\n");
free(buf);
return -1;
}
json_get_frequency_sensors(iter, get_frequency_obj_json, buf);
free(buf);
}

close(fd);
return 0;
}
4 changes: 4 additions & 0 deletions src/variorum/IBM/Power9.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,8 @@ int ibm_cpu_p9_get_node_thermal_json(
json_t *get_thermal_obj
);

int ibm_cpu_p9_get_node_frequency_json(
json_t *get_frequency_obj_json
);

#endif
2 changes: 2 additions & 0 deletions src/variorum/IBM/config_ibm.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ int set_ibm_func_ptrs(int idx)
g_platform[idx].variorum_get_node_power_domain_info_json =
ibm_cpu_p9_get_node_power_domain_info_json;
g_platform[idx].variorum_get_thermals_json = ibm_cpu_p9_get_node_thermal_json;
g_platform[idx].variorum_get_frequency_json =
ibm_cpu_p9_get_node_frequency_json;
}
else
{
Expand Down
Loading
Loading