Skip to content

Commit

Permalink
[mlir][acc] Added async to data clause operations. (llvm#97307)
Browse files Browse the repository at this point in the history
As long as the data clause operations are not tightly
"associated" with the compute/data operations (e.g.
they can be optimized as SSA producers and made block
arguments), the information about the original async()
clause should be attached to the data clause operations
to make it easier to generate proper runtime actions
for them. This change propagates the async() information
from the OpenACC data/compute constructs to the data clause
operations. This change also adds the CurrentDeviceIdResource
to guarantee proper ordering of the operations that read
and write the current device identifier.
  • Loading branch information
vzakhari authored and lravenclaw committed Jul 3, 2024
1 parent 0c6d00a commit 9f87976
Show file tree
Hide file tree
Showing 10 changed files with 580 additions and 192 deletions.
411 changes: 276 additions & 135 deletions flang/lib/Lower/OpenACC.cpp

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions flang/test/Lower/OpenACC/acc-data.f90
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,13 @@ subroutine acc_data
! CHECK: acc.data dataOperands(%{{.*}}) {
! CHECK: } attributes {asyncOnly = [#acc.device_type<none>]}

!$acc data present(a) async(1)
!$acc data copy(a) async(1)
!$acc end data

! CHECK: acc.data async(%{{.*}} : i32) dataOperands(%{{.*}}) {
! CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC:.*]] : i32) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
! CHECK: acc.data async(%[[ASYNC]] : i32) dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<10x10xf32>>) {
! CHECK: }{{$}}
! CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC]] : i32) to varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "a"}

!$acc data present(a) wait
!$acc end data
Expand Down
12 changes: 6 additions & 6 deletions flang/test/Lower/OpenACC/acc-enter-data.f90
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ subroutine acc_enter_data
!$acc enter data create(a) async
!CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[EXTENT_C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {asyncOnly = [#acc.device_type<none>], name = "a", structured = false}
!CHECK: acc.enter_data dataOperands(%[[CREATE_A]] : !fir.ref<!fir.array<10x10xf32>>) attributes {async}

!$acc enter data create(a) wait
Expand All @@ -106,22 +106,22 @@ subroutine acc_enter_data
!$acc enter data create(a) async wait
!CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[EXTENT_C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {asyncOnly = [#acc.device_type<none>], name = "a", structured = false}
!CHECK: acc.enter_data dataOperands(%[[CREATE_A]] : !fir.ref<!fir.array<10x10xf32>>) attributes {async, wait}

!$acc enter data create(a) async(1)
!CHECK: %[[ASYNC1:.*]] = arith.constant 1 : i32
!CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[EXTENT_C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: %[[ASYNC1:.*]] = arith.constant 1 : i32
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) async(%[[ASYNC1]] : i32) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: acc.enter_data async(%[[ASYNC1]] : i32) dataOperands(%[[CREATE_A]] : !fir.ref<!fir.array<10x10xf32>>)

!$acc enter data create(a) async(async)
!CHECK: %[[ASYNC2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
!CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)
!CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[EXTENT_C10]] : index) stride(%c1{{.*}} : index) startIdx(%{{.*}} : index)

!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: %[[ASYNC2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
!CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND0]], %[[BOUND1]]) async(%[[ASYNC2]] : i32) -> !fir.ref<!fir.array<10x10xf32>> {name = "a", structured = false}
!CHECK: acc.enter_data async(%[[ASYNC2]] : i32) dataOperands(%[[CREATE_A]] : !fir.ref<!fir.array<10x10xf32>>)

!$acc enter data create(a) wait(1)
Expand Down
16 changes: 8 additions & 8 deletions flang/test/Lower/OpenACC/acc-exit-data.f90
Original file line number Diff line number Diff line change
Expand Up @@ -56,32 +56,32 @@ subroutine acc_exit_data
!CHECK: acc.detach accPtr(%[[DEVPTR_D]] : !fir.ptr<f32>) {name = "d", structured = false}

!$acc exit data delete(a) async
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {asyncOnly = [#acc.device_type<none>], dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) attributes {async}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {name = "a", structured = false}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {asyncOnly = [#acc.device_type<none>], name = "a", structured = false}

!$acc exit data delete(a) wait
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) attributes {wait}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {name = "a", structured = false}

!$acc exit data delete(a) async wait
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {asyncOnly = [#acc.device_type<none>], dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) attributes {async, wait}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {name = "a", structured = false}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {asyncOnly = [#acc.device_type<none>], name = "a", structured = false}

!$acc exit data delete(a) async(1)
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: %[[ASYNC1:.*]] = arith.constant 1 : i32
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC1]] : i32) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: acc.exit_data async(%[[ASYNC1]] : i32) dataOperands(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>)
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {name = "a", structured = false}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC1]] : i32) {name = "a", structured = false}


!$acc exit data delete(a) async(async)
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: %[[ASYNC2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC2]] : i32) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
!CHECK: acc.exit_data async(%[[ASYNC2]] : i32) dataOperands(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>)
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) {name = "a", structured = false}
!CHECK: acc.delete accPtr(%[[DEVPTR]] : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async(%[[ASYNC2]] : i32) {name = "a", structured = false}

!$acc exit data delete(a) wait(1)
!CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {dataClause = #acc<data_clause acc_delete>, name = "a", structured = false}
Expand Down
14 changes: 7 additions & 7 deletions flang/test/Lower/OpenACC/acc-parallel.f90
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ subroutine acc_parallel
!$acc parallel async
!$acc end parallel

! CHECK: acc.parallel {
! CHECK: acc.parallel {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

Expand All @@ -76,7 +76,7 @@ subroutine acc_parallel
!$acc end parallel

! CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: acc.parallel async([[ASYNC2]] : i32) {
! CHECK-NEXT: acc.parallel async([[ASYNC2]] : i32) {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -324,13 +324,13 @@ subroutine acc_parallel
! CHECK: acc.detach accPtr(%[[ATTACH_D]] : !fir.ptr<f32>) {dataClause = #acc<data_clause acc_attach>, name = "d"}
! CHECK: acc.detach accPtr(%[[ATTACH_E]] : !fir.ptr<f32>) {dataClause = #acc<data_clause acc_attach>, name = "e"}

!$acc parallel private(a) firstprivate(b) private(c)
!$acc parallel private(a) firstprivate(b) private(c) async(1)
!$acc end parallel

! CHECK: %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {name = "a"}
! CHECK: %[[ACC_FPRIVATE_B:.*]] = acc.firstprivate varPtr(%[[DECLB]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {name = "b"}
! CHECK: %[[ACC_PRIVATE_C:.*]] = acc.private varPtr(%[[DECLC]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<10x10xf32>> {name = "c"}
! CHECK: acc.parallel firstprivate(@firstprivatization_section_ext10xext10_ref_10x10xf32 -> %[[ACC_FPRIVATE_B]] : !fir.ref<!fir.array<10x10xf32>>) private(@privatization_ref_10x10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %[[ACC_PRIVATE_C]] : !fir.ref<!fir.array<10x10xf32>>) {
! CHECK: %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async([[ASYNC3:%.*]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "a"}
! CHECK: %[[ACC_FPRIVATE_B:.*]] = acc.firstprivate varPtr(%[[DECLB]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async([[ASYNC3]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "b"}
! CHECK: %[[ACC_PRIVATE_C:.*]] = acc.private varPtr(%[[DECLC]]#0 : !fir.ref<!fir.array<10x10xf32>>) bounds(%{{.*}}, %{{.*}}) async([[ASYNC3]]) -> !fir.ref<!fir.array<10x10xf32>> {name = "c"}
! CHECK: acc.parallel async([[ASYNC3]]) firstprivate(@firstprivatization_section_ext10xext10_ref_10x10xf32 -> %[[ACC_FPRIVATE_B]] : !fir.ref<!fir.array<10x10xf32>>) private(@privatization_ref_10x10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %[[ACC_PRIVATE_C]] : !fir.ref<!fir.array<10x10xf32>>) {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenACC/acc-serial.f90
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ subroutine acc_serial
!$acc serial async
!$acc end serial

! CHECK: acc.serial {
! CHECK: acc.serial {
! CHECK: acc.yield
! CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

Expand All @@ -76,7 +76,7 @@ subroutine acc_serial
!$acc end serial

! CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: acc.serial async([[ASYNC2]] : i32) {
! CHECK-NEXT: acc.serial async([[ASYNC2]] : i32) {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down
Loading

0 comments on commit 9f87976

Please sign in to comment.