Skip to content

Commit

Permalink
[OpenMP][OMPT] Indicate loop schedule for worksharing-loop events (ll…
Browse files Browse the repository at this point in the history
…vm#97429)

Use more specific values from `ompt_work_t` to allow the tool identify
the schedule of a worksharing-loop. With this patch, the runtime will
report the schedule chosen by the runtime rather than necessarily the
schedule literally requested by the clause.
E.g., for guided + just one iteration per thread, the runtime would
choose and report static.

Fixes issue llvm#63904
  • Loading branch information
jprotze authored and lravenclaw committed Jul 3, 2024
1 parent f128746 commit a79a666
Show file tree
Hide file tree
Showing 23 changed files with 199 additions and 215 deletions.
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2006,13 +2006,13 @@ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {

#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_work_t ompt_work_type = ompt_work_loop;
ompt_work_t ompt_work_type = ompt_work_loop_static;
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
// Determine workshare type
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop;
ompt_work_type = ompt_work_loop_static;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
Expand Down
9 changes: 5 additions & 4 deletions openmp/runtime/src/kmp_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,8 +1164,9 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
ompt_get_work_schedule(pr->schedule), ompt_scope_begin,
&(team_info->parallel_data), &(task_info->task_data), pr->u.p.tc,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic);
Expand Down Expand Up @@ -2121,8 +2122,8 @@ int __kmp_dispatch_next_algorithm(int gtid,
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
ompt_callbacks.ompt_callback(ompt_callback_work)( \
ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
&(task_info->task_data), 0, codeptr); \
ompt_get_work_schedule(pr->schedule), ompt_scope_end, \
&(team_info->parallel_data), &(task_info->task_data), 0, codeptr); \
} \
}
#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp_sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
#if OMPT_SUPPORT && OMPT_OPTIONAL
ompt_team_info_t *team_info = NULL;
ompt_task_info_t *task_info = NULL;
ompt_work_t ompt_work_type = ompt_work_loop;
ompt_work_t ompt_work_type = ompt_work_loop_static;

static kmp_int8 warn = 0;

Expand All @@ -114,7 +114,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
// Determine workshare type
if (loc != NULL) {
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
ompt_work_type = ompt_work_loop;
ompt_work_type = ompt_work_loop_static;
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
ompt_work_type = ompt_work_sections;
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
Expand Down
19 changes: 19 additions & 0 deletions openmp/runtime/src/ompt-specific.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,25 @@ inline const char *ompt_get_runtime_version() {
return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
}

inline ompt_work_t ompt_get_work_schedule(enum sched_type schedule) {
switch (SCHEDULE_WITHOUT_MODIFIERS(schedule)) {
case kmp_sch_static_chunked:
case kmp_sch_static_balanced:
case kmp_sch_static_greedy:
return ompt_work_loop_static;
case kmp_sch_dynamic_chunked:
case kmp_sch_static_steal:
return ompt_work_loop_dynamic;
case kmp_sch_guided_iterative_chunked:
case kmp_sch_guided_analytical_chunked:
case kmp_sch_guided_chunked:
case kmp_sch_guided_simd:
return ompt_work_loop_guided;
default:
return ompt_work_loop_other;
}
}

class OmptReturnAddressGuard {
private:
bool SetAddress{false};
Expand Down
183 changes: 45 additions & 138 deletions openmp/runtime/test/ompt/callback.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,36 @@ static const char* ompt_cancel_flag_t_values[] = {
"ompt_cancel_discarded_task"
};

static const char *ompt_work_t_values[] = {"undefined",
"ompt_work_loop",
"ompt_work_sections",
"ompt_work_single_executor",
"ompt_work_single_other",
"ompt_work_workshare",
"ompt_work_distribute",
"ompt_work_taskloop",
"ompt_work_scope",
"ompt_work_workdistribute",
"ompt_work_loop_static",
"ompt_work_loop_dynamic",
"ompt_work_loop_guided",
"ompt_work_loop_other"};

static const char *ompt_work_events_t_values[] = {"undefined",
"ompt_event_loop",
"ompt_event_sections",
"ompt_event_single_in_block",
"ompt_event_single_others",
"ompt_event_workshare",
"ompt_event_distribute",
"ompt_event_taskloop",
"ompt_event_scope",
"ompt_event_workdistribute",
"ompt_event_loop_static",
"ompt_event_loop_dynamic",
"ompt_event_loop_guided",
"ompt_event_loop_other"};

static const char *ompt_dependence_type_t_values[36] = {
"ompt_dependence_type_UNDEFINED",
"ompt_dependence_type_in", // 1
Expand Down Expand Up @@ -852,144 +882,21 @@ on_ompt_callback_work(
{
switch(endpoint)
{
case ompt_scope_begin:
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
// TODO: add schedule attribute for the different work_loop types.
// e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_sections_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_in_block_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_others_begin: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_distribute_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_taskloop_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_scope:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_scope_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
}
break;
case ompt_scope_end:
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_sections:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_sections_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_executor:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_in_block_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_single_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_single_others_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_workshare:
//impl
break;
case ompt_work_distribute:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_distribute_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_taskloop:
//impl
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_taskloop_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
case ompt_work_scope:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_scope_end: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
"\n",
ompt_get_thread_data()->value, parallel_data->value,
task_data->value, codeptr_ra, count);
break;
}
break;
case ompt_scope_beginend:
printf("ompt_scope_beginend should never be passed to %s\n", __func__);
exit(-1);
case ompt_scope_begin:
printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_scope_end:
printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
parallel_data->value, task_data->value, codeptr_ra, count);
break;
case ompt_scope_beginend:
printf("ompt_scope_beginend should never be passed to %s\n", __func__);
exit(-1);
}
}

Expand Down
10 changes: 5 additions & 5 deletions openmp/runtime/test/ompt/synchronization/ordered_dependences.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
int main() {
int a[10][10];
#pragma omp parallel num_threads(2)
#pragma omp for ordered(2)
#pragma omp for ordered(2) schedule(static)
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++) {
a[i][j] = i + j + 1;
Expand All @@ -23,8 +23,8 @@ int main() {
}
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]

// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[ITASK:[0-9]+]],
// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_static_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[ITASK:[0-9]+]],

// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[ITASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0,
Expand All @@ -38,8 +38,8 @@ int main() {
// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1,
// CHECK-SAME: ompt_dependence_type_source)], ndeps=2

// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[ITASK:[0-9]+]],
// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_static_begin:
// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[ITASK:[0-9]+]],

// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[ITASK]],
// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0,
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/test/ompt/tasks/taskloop.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int main() {
// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
Expand All @@ -52,7 +52,7 @@ int main() {
// CHECK-NOT: {{^}}[[MASTER_ID]]: ompt_event_task_create:
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: count=2
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin:
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end:
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/ompt/tasks/taskloop_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int main() {

// CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID]]
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]]
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=16

// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
Expand Down
8 changes: 4 additions & 4 deletions openmp/runtime/test/ompt/teams/distribute_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,28 @@ int main() {

// CHECK: {{^}}[[THREAD_ID0:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID0:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID0:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID0:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID0]], task_id=[[TASK_ID0]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID1:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID1:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID1:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID1:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID1]], task_id=[[TASK_ID1]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID2:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID2:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID2:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID2:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID2]], task_id=[[TASK_ID2]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16

// CHECK: {{^}}[[THREAD_ID3:[0-9]+]]: ompt_event_distribute_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID3:[0-9]+]]
// CHECK-SAME: parent_task_id=[[TASK_ID3:[0-9]+]]
// CHECK-SAME: task_id=[[TASK_ID3:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_distribute_chunk_begin:
// CHECK-SAME: parallel_id=[[PARALLEL_ID3]], task_id=[[TASK_ID3]]
// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses guided schedule for auto,
// which is a reason choice
#define SCHED_OUTPUT "guided"
#include "base.h"
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto_serialized.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses static schedule for serialized loop,
// which is a reason choice
#define SCHED_OUTPUT "static"
#include "base_serialized.h"
3 changes: 3 additions & 0 deletions openmp/runtime/test/ompt/worksharing/for/auto_split.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@
// XFAIL: gcc

#define SCHEDULE auto
// The runtime uses guided schedule for auto,
// which is a reason choice
#define SCHED_OUTPUT "guided"
#include "base_split.h"
Loading

0 comments on commit a79a666

Please sign in to comment.