Skip to content

Commit 7954b84

Browse files
committed
LICM: Avoid looking at use list of constant data
The codegen test changes seem incidental. Either way, sms-grp-order.ll seems to already not hit the original issue.
1 parent d4cc3ae commit 7954b84

File tree

7 files changed

+110
-36
lines changed

7 files changed

+110
-36
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
22942294
AliasSetTracker AST(BatchAA);
22952295

22962296
auto IsPotentiallyPromotable = [L](const Instruction *I) {
2297-
if (const auto *SI = dyn_cast<StoreInst>(I))
2298-
return L->isLoopInvariant(SI->getPointerOperand());
2299-
if (const auto *LI = dyn_cast<LoadInst>(I))
2300-
return L->isLoopInvariant(LI->getPointerOperand());
2297+
if (const auto *SI = dyn_cast<StoreInst>(I)) {
2298+
const Value *PtrOp = SI->getPointerOperand();
2299+
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
2300+
}
2301+
if (const auto *LI = dyn_cast<LoadInst>(I)) {
2302+
const Value *PtrOp = LI->getPointerOperand();
2303+
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
2304+
}
23012305
return false;
23022306
};
23032307

llvm/test/CodeGen/AMDGPU/swdev380865.ll

+5-4
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
1616
; CHECK: ; %bb.0: ; %entry
1717
; CHECK-NEXT: s_mov_b64 s[0:1], 0
1818
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0
19+
; CHECK-NEXT: s_mov_b64 s[0:1], 0x100
1920
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
2021
; CHECK-NEXT: s_mov_b32 s4, 0
2122
; CHECK-NEXT: s_mov_b32 s0, 0
22-
; CHECK-NEXT: s_mov_b32 s5, 0x40280000
2323
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2424
; CHECK-NEXT: s_mov_b32 s1, s2
2525
; CHECK-NEXT: s_mov_b32 s2, 0
2626
; CHECK-NEXT: v_mov_b32_e32 v0, s6
2727
; CHECK-NEXT: s_mov_b32 s3, 0x40260000
28+
; CHECK-NEXT: s_mov_b32 s5, 0x40280000
2829
; CHECK-NEXT: v_mov_b32_e32 v1, s7
2930
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
3031
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
5051
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5]
5152
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
5253
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit
53-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
54+
; CHECK-NEXT: v_mov_b32_e32 v2, 0x100
5455
; CHECK-NEXT: v_mov_b32_e32 v3, 0
5556
; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
5657
; CHECK-NEXT: s_endpgm
@@ -61,7 +62,7 @@ entry:
6162

6263
for.cond4.preheader: ; preds = %for.cond4.preheader, %entry
6364
%idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ]
64-
%arrayidx.promoted = load double, ptr addrspace(1) null, align 8
65+
%arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
6566
%add9 = fadd contract double %arrayidx.promoted, 0.000000e+00
6667
%add9.1 = fadd contract double %add9, 5.000000e+00
6768
%add9.2 = fadd contract double %add9.1, 6.000000e+00
@@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader
7071
%add9.5 = fadd contract double %add9.4, 1.000000e+01
7172
%add9.6 = fadd contract double %add9.5, 1.100000e+01
7273
%add9.7 = fadd contract double %add9.6, 1.200000e+01
73-
store double %add9.7, ptr addrspace(1) null, align 8
74+
store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
7475
%add13 = add i32 %idx.07, %0
7576
%cmp = icmp slt i32 %add13, 2560
7677
br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup

llvm/test/CodeGen/PowerPC/pr43527.ll

+13-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
33
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
44
; We don't want to produce a CTR loop due to the call to lrint in the body.
5-
define dso_local void @test(i64 %arg, i64 %arg1) {
5+
define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
66
; CHECK-LABEL: test:
77
; CHECK: # %bb.0: # %bb
88
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5
@@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
1212
; CHECK-NEXT: mflr r0
1313
; CHECK-NEXT: .cfi_def_cfa_offset 64
1414
; CHECK-NEXT: .cfi_offset lr, 16
15+
; CHECK-NEXT: .cfi_offset r28, -32
1516
; CHECK-NEXT: .cfi_offset r29, -24
1617
; CHECK-NEXT: .cfi_offset r30, -16
18+
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
1719
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
1820
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1921
; CHECK-NEXT: stdu r1, -64(r1)
20-
; CHECK-NEXT: sub r30, r4, r3
21-
; CHECK-NEXT: li r29, -4
22+
; CHECK-NEXT: mr r30, r5
23+
; CHECK-NEXT: sub r29, r4, r3
24+
; CHECK-NEXT: addi r28, r5, -4
2225
; CHECK-NEXT: std r0, 80(r1)
2326
; CHECK-NEXT: .p2align 5
2427
; CHECK-NEXT: .LBB0_3: # %bb5
2528
; CHECK-NEXT: #
26-
; CHECK-NEXT: lfsu f1, 4(r29)
29+
; CHECK-NEXT: lfsu f1, 4(r28)
2730
; CHECK-NEXT: bl lrint
2831
; CHECK-NEXT: nop
29-
; CHECK-NEXT: addi r30, r30, -1
30-
; CHECK-NEXT: cmpldi r30, 0
32+
; CHECK-NEXT: addi r29, r29, -1
33+
; CHECK-NEXT: stb r3, 0(r30)
34+
; CHECK-NEXT: cmpldi r29, 0
3135
; CHECK-NEXT: bc 12, gt, .LBB0_3
3236
; CHECK-NEXT: # %bb.4: # %bb15
33-
; CHECK-NEXT: stb r3, 0(r3)
3437
; CHECK-NEXT: addi r1, r1, 64
3538
; CHECK-NEXT: ld r0, 16(r1)
3639
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
3740
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
41+
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
3842
; CHECK-NEXT: mtlr r0
3943
; CHECK-NEXT: blr
4044
; CHECK-NEXT: .LBB0_5: # %bb2
@@ -54,12 +58,12 @@ bb4: ; preds = %bb3
5458

5559
bb5: ; preds = %bb5, %bb4
5660
%tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ]
57-
%tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6
61+
%tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6
5862
%tmp8 = load float, ptr %tmp7, align 4
5963
%tmp9 = fpext float %tmp8 to double
6064
%tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2
6165
%tmp11 = trunc i64 %tmp10 to i8
62-
store i8 %tmp11, ptr undef, align 1
66+
store i8 %tmp11, ptr %arg2, align 1
6367
%tmp12 = add nuw i64 %tmp6, 1
6468
%tmp13 = icmp eq i64 %tmp12, %tmp
6569
br i1 %tmp13, label %bb15, label %bb5

llvm/test/CodeGen/PowerPC/pr48519.ll

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 {
3232
; CHECK-NEXT: # %bb.2: # %bb11
3333
; CHECK-NEXT: bl __truncsfhf2
3434
; CHECK-NEXT: nop
35-
; CHECK-NEXT: sth r3, 0(r3)
35+
; CHECK-NEXT: sth r3, 128(0)
3636
;
3737
; CHECK-P9-LABEL: julia__typed_vcat_20:
3838
; CHECK-P9: # %bb.0: # %bb
@@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 {
5454
; CHECK-P9-NEXT: bdnz .LBB0_1
5555
; CHECK-P9-NEXT: # %bb.2: # %bb11
5656
; CHECK-P9-NEXT: xscvdphp f0, f0
57+
; CHECK-P9-NEXT: li r3, 128
5758
; CHECK-P9-NEXT: stxsihx f0, 0, r3
5859
bb:
5960
%i = load i64, ptr addrspace(11) null, align 8
@@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb
6768
%i6 = add nsw i64 %i5, -1
6869
%i7 = add i64 %i6, 0
6970
%i8 = sitofp i64 %i7 to half
70-
store half %i8, ptr addrspace(13) undef, align 2
71+
store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2
7172
%i9 = icmp eq i64 %i4, 0
7273
%i10 = add i64 %i4, 1
7374
br i1 %i9, label %bb11, label %bb3

llvm/test/CodeGen/PowerPC/sms-grp-order.ll

+15-15
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,32 @@
22
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
33
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s
44

5-
define void @lame_encode_buffer_interleaved() local_unnamed_addr {
5+
define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr {
66
; CHECK-LABEL: lame_encode_buffer_interleaved:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: lha 3, 0(3)
9-
; CHECK-NEXT: li 5, 1
10-
; CHECK-NEXT: lhz 4, 0(0)
11-
; CHECK-NEXT: rldic 5, 5, 62, 1
12-
; CHECK-NEXT: mtctr 5
13-
; CHECK-NEXT: srawi 3, 3, 1
14-
; CHECK-NEXT: addze 3, 3
8+
; CHECK-NEXT: li 4, 1
9+
; CHECK-NEXT: rldic 4, 4, 62, 1
10+
; CHECK-NEXT: mtctr 4
1511
; CHECK-NEXT: .p2align 4
1612
; CHECK-NEXT: .LBB0_1:
17-
; CHECK-NEXT: extsh 4, 4
13+
; CHECK-NEXT: lha 4, 0(3)
14+
; CHECK-NEXT: lha 5, 0(3)
1815
; CHECK-NEXT: srawi 4, 4, 1
1916
; CHECK-NEXT: addze 4, 4
17+
; CHECK-NEXT: srawi 5, 5, 1
18+
; CHECK-NEXT: addze 5, 5
19+
; CHECK-NEXT: sth 4, 0(3)
20+
; CHECK-NEXT: sth 5, 0(3)
2021
; CHECK-NEXT: bdnz .LBB0_1
2122
; CHECK-NEXT: # %bb.2:
22-
; CHECK-NEXT: sth 4, 0(0)
23-
; CHECK-NEXT: sth 3, 0(3)
2423
; CHECK-NEXT: blr
24+
%undef = freeze ptr poison
2525
br label %1
2626

2727
1: ; preds = %1, %0
2828
%2 = phi i64 [ 0, %0 ], [ %13, %1 ]
29-
%3 = load i16, ptr null, align 2
30-
%4 = load i16, ptr undef, align 2
29+
%3 = load i16, ptr %arg0, align 2
30+
%4 = load i16, ptr %undef, align 2
3131
%5 = sext i16 %3 to i32
3232
%6 = sext i16 %4 to i32
3333
%7 = add nsw i32 0, %5
@@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr {
3636
%10 = sdiv i32 %8, 2
3737
%11 = trunc i32 %9 to i16
3838
%12 = trunc i32 %10 to i16
39-
store i16 %11, ptr null, align 2
40-
store i16 %12, ptr undef, align 2
39+
store i16 %11, ptr %arg0, align 2
40+
store i16 %12, ptr %undef, align 2
4141
%13 = add i64 %2, 4
4242
%14 = icmp eq i64 %13, 0
4343
br i1 %14, label %15, label %1

llvm/test/Transforms/LICM/pr50367.ll

+40-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
33
@e = external dso_local global ptr, align 8
44

5-
define void @main(i1 %arg) {
5+
define void @main(i1 %arg, ptr %arg1) {
66
; CHECK-LABEL: @main(
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br label [[LOOP1:%.*]]
@@ -11,8 +11,47 @@ define void @main(i1 %arg) {
1111
; CHECK: loop2:
1212
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
1313
; CHECK: loop2.latch:
14+
; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4
1415
; CHECK-NEXT: br label [[LOOP2]]
1516
; CHECK: loop.latch:
17+
; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]]
18+
; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]]
19+
; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]]
20+
; CHECK-NEXT: br label [[LOOP1]]
21+
;
22+
entry:
23+
br label %loop1
24+
25+
loop1:
26+
br label %loop2
27+
28+
loop2:
29+
br i1 %arg, label %loop2.latch, label %loop.latch
30+
31+
loop2.latch:
32+
store i32 0, ptr %arg1, align 4
33+
br label %loop2
34+
35+
loop.latch:
36+
store ptr null, ptr @e, align 8, !tbaa !0
37+
%ptr = load ptr, ptr @e, align 8, !tbaa !0
38+
store i32 0, ptr %ptr, align 4, !tbaa !4
39+
br label %loop1
40+
}
41+
42+
define void @store_null(i1 %arg) {
43+
; CHECK-LABEL: @store_null(
44+
; CHECK-NEXT: entry:
45+
; CHECK-NEXT: br label [[LOOP1:%.*]]
46+
; CHECK: loop1:
47+
; CHECK-NEXT: br label [[LOOP2:%.*]]
48+
; CHECK: loop2:
49+
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
50+
; CHECK: loop2.latch:
51+
; CHECK-NEXT: store i32 0, ptr null, align 4
52+
; CHECK-NEXT: br label [[LOOP2]]
53+
; CHECK: loop.latch:
54+
; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]]
1655
; CHECK-NEXT: br label [[LOOP1]]
1756
;
1857
entry:

llvm/test/Transforms/LICM/pr59324.ll

+26-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ define void @test(ptr %a) {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: br label [[LOOP:%.*]]
88
; CHECK: loop:
9-
; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4
9+
; CHECK-NEXT: store ptr null, ptr null, align 8
10+
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8
11+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
12+
; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
1013
; CHECK-NEXT: br label [[LOOP]]
1114
;
1215
entry:
@@ -19,3 +22,25 @@ loop:
1922
store i32 %v, ptr %a
2023
br label %loop
2124
}
25+
26+
define void @test_inttoptr(ptr %a) {
27+
; CHECK-LABEL: @test_inttoptr(
28+
; CHECK-NEXT: entry:
29+
; CHECK-NEXT: br label [[LOOP:%.*]]
30+
; CHECK: loop:
31+
; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8
32+
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8
33+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
34+
; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
35+
; CHECK-NEXT: br label [[LOOP]]
36+
;
37+
entry:
38+
br label %loop
39+
40+
loop:
41+
store ptr null, ptr inttoptr (i64 128 to ptr)
42+
%p = load ptr, ptr inttoptr (i64 128 to ptr)
43+
%v = load i32, ptr %p
44+
store i32 %v, ptr %a
45+
br label %loop
46+
}

0 commit comments

Comments
 (0)