Skip to content

Commit c39fba2

Browse files
[AMDGPU] S_SET_GPR_IDX_ON can be passed an immediate index (#125086)
Oversight found by ISel fuzz effort. Assuming the argument is a register, in some cases it can be an immediate. Tablegen's type for the instruction is SSrc_b32, i.e. register or immediate fine. Added the repro from the bug reporter as a test case - prior to this patch llvm will assert in getReg. Fixes SWDEV-508589
1 parent 2428b6e commit c39fba2

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2366,11 +2366,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
23662366
assert(ST.useVGPRIndexMode());
23672367
Register VecReg = MI.getOperand(0).getReg();
23682368
bool IsUndef = MI.getOperand(1).isUndef();
2369-
Register Idx = MI.getOperand(3).getReg();
2369+
MachineOperand Idx = MI.getOperand(3);
23702370
Register SubReg = MI.getOperand(4).getImm();
23712371

23722372
MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
2373-
.addReg(Idx)
2373+
.add(Idx)
23742374
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
23752375
SetOn->getOperand(3).setIsUndef();
23762376

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s
3+
4+
define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
5+
; CHECK-LABEL: copy_to_reg_frameindex:
6+
; CHECK: ; %bb.0: ; %entry
7+
; CHECK-NEXT: ; implicit-def: $vgpr0
8+
; CHECK-NEXT: .LBB0_1: ; %loop
9+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
10+
; CHECK-NEXT: s_cmp_lt_u32 0, 16
11+
; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
12+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
13+
; CHECK-NEXT: s_set_gpr_idx_off
14+
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
15+
; CHECK-NEXT: ; %bb.2: ; %done
16+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
17+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
18+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
19+
; CHECK-NEXT: global_store_dword v1, v0, s[0:1]
20+
; CHECK-NEXT: s_endpgm
21+
entry:
22+
%B = srem i32 %c, -1
23+
%alloca = alloca [16 x i32], align 4, addrspace(5)
24+
br label %loop
25+
26+
loop:
27+
%inc = phi i32 [ 0, %entry ], [ %inc.i, %loop ]
28+
%ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %inc
29+
store i32 %inc, ptr addrspace(5) %ptr, align 4
30+
%inc.i = add i32 %inc, %B
31+
%cnd = icmp uge i32 %inc.i, 16
32+
br i1 %cnd, label %done, label %loop
33+
34+
done:
35+
%tmp1 = load i32, ptr addrspace(5) %alloca, align 4
36+
store i32 %tmp1, ptr addrspace(1) %out, align 4
37+
ret void
38+
}

0 commit comments

Comments
 (0)