Skip to content

Commit

Permalink
AMDGPU: Fix inst-selection of large scratch offsets with sgpr base
Browse files Browse the repository at this point in the history
Use i32 for offset instead of i16, this way it does not get interpreted
as negative 16 bit offset.
  • Loading branch information
petar-avramovic committed Sep 27, 2024
1 parent 43076c2 commit 2ea25b2
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
0);
}

Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);

return true;
}
Expand Down Expand Up @@ -1966,7 +1966,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
return true;
}
}
Expand Down Expand Up @@ -1999,7 +1999,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4926,7 +4926,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
;
; GFX12-LABEL: sgpr_base_large_offset:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
Expand Down Expand Up @@ -4985,7 +4985,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
;
; GFX12-PAL-LABEL: sgpr_base_large_offset:
; GFX12-PAL: ; %bb.0: ; %entry
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-PAL-NEXT: s_nop 0
Expand Down Expand Up @@ -5038,7 +5038,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000
; GFX12-NEXT: s_and_b32 s0, s0, -4
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
Expand Down Expand Up @@ -5103,7 +5103,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
; GFX12-PAL: ; %bb.0: ; %entry
; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000
; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-PAL-NEXT: s_nop 0
Expand Down Expand Up @@ -5159,7 +5159,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: v_mov_b32_e32 v1, 15
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
;
Expand Down Expand Up @@ -5221,7 +5221,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; GFX12-PAL: ; %bb.0: ; %bb
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: s_endpgm
bb:
Expand Down

0 comments on commit 2ea25b2

Please sign in to comment.