Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bpf): Fix issue(s) introduced with bpf refactor #1407

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions bpfassets/libbpf/src/kepler.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, process_metrics_t);
__uint(max_entries, NUM_CPUS);
__uint(max_entries, MAP_SIZE);
} processes SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
__uint(max_entries, NUM_CPUS);
__uint(max_entries, MAP_SIZE);
} pid_time SEC(".maps");

struct {
Expand Down Expand Up @@ -95,8 +95,9 @@ int counter_sched_switch = 0;

static inline u64 get_on_cpu_time(u32 cur_pid, u32 prev_pid, u64 cur_ts)
{
u64 cpu_time, *prev_ts;
pid_time_t prev_pid_key, new_pid_key;
u64 cpu_time = 0;
u64 *prev_ts;
pid_time_t prev_pid_key, new_pid_key = {};

prev_pid_key.pid = prev_pid;
prev_ts = bpf_map_lookup_elem(&pid_time, &prev_pid_key);
Expand All @@ -115,7 +116,7 @@ static inline u64 get_on_cpu_time(u32 cur_pid, u32 prev_pid, u64 cur_ts)

static inline u64 calc_delta(u64 *prev_val, u64 *val)
{
u64 delta;
u64 delta = 0;

if (prev_val) {
if (*val > *prev_val)
Expand All @@ -127,14 +128,14 @@ static inline u64 calc_delta(u64 *prev_val, u64 *val)
static inline u64 get_on_cpu_cycles(u32 *cpu_id)
{
u64 delta, val, *prev_val;
dave-tucker marked this conversation as resolved.
Show resolved Hide resolved
struct bpf_perf_event_value c;
dave-tucker marked this conversation as resolved.
Show resolved Hide resolved
int error;
long error;
struct bpf_perf_event_value c = {};

error = bpf_perf_event_read_value(
&cpu_cycles_event_reader, *cpu_id, &c, sizeof(c));
if (error)
return 0;

// TODO: Fix Verifier errors upon changing this to bpf_perf_event_read_value
error = bpf_perf_event_read(&cpu_cycles_event_reader, *cpu_id);
if (error < 0) {
return delta;
}
val = c.counter;
prev_val = bpf_map_lookup_elem(&cpu_cycles, cpu_id);
delta = calc_delta(prev_val, &val);
Expand All @@ -146,13 +147,13 @@ static inline u64 get_on_cpu_cycles(u32 *cpu_id)
static inline u64 get_on_cpu_instr(u32 *cpu_id)
{
u64 delta, val, *prev_val;
int error;
struct bpf_perf_event_value c;
long error;
struct bpf_perf_event_value c = {};

error = bpf_perf_event_read_value(
&cpu_instructions_event_reader, *cpu_id, &c, sizeof(c));
if (error) {
return delta;
return 0;
}
val = c.counter;
prev_val = bpf_map_lookup_elem(&cpu_instructions, cpu_id);
Expand All @@ -165,13 +166,13 @@ static inline u64 get_on_cpu_instr(u32 *cpu_id)
static inline u64 get_on_cpu_cache_miss(u32 *cpu_id)
{
u64 delta, val, *prev_val;
int error;
struct bpf_perf_event_value c;
long error;
struct bpf_perf_event_value c = {};

error = bpf_perf_event_read_value(
&cache_miss_event_reader, *cpu_id, &c, sizeof(c));
if (error) {
return delta;
return 0;
}
val = c.counter;
prev_val = bpf_map_lookup_elem(&cache_miss, cpu_id);
Expand Down Expand Up @@ -203,7 +204,8 @@ int kepler_sched_switch_trace(struct sched_switch_info *ctx)
u64 pid_tgid, cgroup_id, cur_ts;
pid_t cur_pid;

struct process_metrics_t *cur_pid_metrics, *prev_pid_metrics, buf;
struct process_metrics_t *cur_pid_metrics, *prev_pid_metrics;
struct process_metrics_t buf = {};

if (SAMPLE_RATE > 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SAMPLE_RATE seems changed from 1 to 5. means once in every 6 task switch we are updating maps, earlier it would be every other task switch. Update in CHANGELOG.md

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be i missed this in earlier PR, but we were using SEC("tracepoint/sched/sched_switch") some time ago, and switched to SEC("kprobe/finish_task_switch") in this commit , and now we are back to using tracepoint, because API is stable. We can confirm the reason for using kprobes with @rootfs , and capture the reason in CHANGELOG.md

Copy link
Collaborator Author

@dave-tucker dave-tucker May 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: SAMPLE_RATE it's 5 in the code - but can be changed back to 0 if you prefer.
Regardless, at program load time it gets patched here to whatever the user has configured where the default of 0 remains unchanged.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and re: tracepoints, I got confirmation from one of our kernel devs that the tracepoint and kprobe/finish_task_switch are close enough together that it makes no difference to power measurements.
there was nothing I can see in the patch you linked that indicates that kprobes would be required for any reason, but we can let @rootfs confirm.

if (counter_sched_switch > 0) {
Expand Down Expand Up @@ -236,7 +238,7 @@ int kepler_sched_switch_trace(struct sched_switch_info *ctx)
prev_pid_metrics->cache_miss += buf.cache_miss;
}

// creat new process metrics
// create new process metrics
cur_pid_metrics = bpf_map_lookup_elem(&processes, &cur_pid);
if (!cur_pid_metrics) {
process_metrics_t new_process = {};
Expand Down
4 changes: 4 additions & 0 deletions bpfassets/libbpf/src/kepler.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ typedef struct pid_time_t {
# define NUM_CPUS 128
#endif

#ifndef MAP_SIZE
# define MAP_SIZE 32768
#endif

#include <bpf/bpf_helpers.h>

enum bpf_map_type {
Expand Down
1 change: 1 addition & 0 deletions pkg/bpfassets/attacher/attacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ func attachLibbpfModule() (*bpf.Module, error) {
return nil, fmt.Errorf("failed to load module: %v", err)
}
// resize array entries
klog.Infof("%d CPU cores detected. Resizing eBPF Perf Event Arrays", cpuCores)
for _, arrayName := range bpfArrays {
err = resizeArrayEntries(arrayName, cpuCores)
if err != nil {
Expand Down
Loading