Skip to content

LICM: Avoid looking at use list of constant data #134690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
AliasSetTracker AST(BatchAA);

auto IsPotentiallyPromotable = [L](const Instruction *I) {
if (const auto *SI = dyn_cast<StoreInst>(I))
return L->isLoopInvariant(SI->getPointerOperand());
if (const auto *LI = dyn_cast<LoadInst>(I))
return L->isLoopInvariant(LI->getPointerOperand());
if (const auto *SI = dyn_cast<StoreInst>(I)) {
const Value *PtrOp = SI->getPointerOperand();
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely ConstantData is just constant, so it is guaranteed to be loop-invariant, so this should be:

Suggested change
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
return isa<ConstantData>(PtrOp) || L->isLoopInvariant(PtrOp);

And couldn't the ConstantData check be done inside of isLoopInvariant?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's loop invariant -- the point here is to skip promotion for constant data, even if its loop invariant, because promotion works by iterating over the use list. In practice, this basically means that we'll not try to promote loads/stores to null pointers, which is just a bugpoint/reduce-ism.

}
if (const auto *LI = dyn_cast<LoadInst>(I)) {
const Value *PtrOp = LI->getPointerOperand();
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
}
return false;
};

Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/swdev380865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0
; CHECK-NEXT: s_mov_b64 s[0:1], 0x100
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s1, s2
; CHECK-NEXT: s_mov_b32 s2, 0
; CHECK-NEXT: v_mov_b32_e32 v0, s6
; CHECK-NEXT: s_mov_b32 s3, 0x40260000
; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
Expand All @@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 0x100
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; CHECK-NEXT: s_endpgm
Expand All @@ -61,7 +62,7 @@ entry:

for.cond4.preheader: ; preds = %for.cond4.preheader, %entry
%idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ]
%arrayidx.promoted = load double, ptr addrspace(1) null, align 8
%arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add9 = fadd contract double %arrayidx.promoted, 0.000000e+00
%add9.1 = fadd contract double %add9, 5.000000e+00
%add9.2 = fadd contract double %add9.1, 6.000000e+00
Expand All @@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader
%add9.5 = fadd contract double %add9.4, 1.000000e+01
%add9.6 = fadd contract double %add9.5, 1.100000e+01
%add9.7 = fadd contract double %add9.6, 1.200000e+01
store double %add9.7, ptr addrspace(1) null, align 8
store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add13 = add i32 %idx.07, %0
%cmp = icmp slt i32 %add13, 2560
br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup
Expand Down
22 changes: 13 additions & 9 deletions llvm/test/CodeGen/PowerPC/pr43527.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; We don't want to produce a CTR loop due to the call to lrint in the body.
define dso_local void @test(i64 %arg, i64 %arg1) {
define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5
Expand All @@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r28, -32
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: sub r30, r4, r3
; CHECK-NEXT: li r29, -4
; CHECK-NEXT: mr r30, r5
; CHECK-NEXT: sub r29, r4, r3
; CHECK-NEXT: addi r28, r5, -4
; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_3: # %bb5
; CHECK-NEXT: #
; CHECK-NEXT: lfsu f1, 4(r29)
; CHECK-NEXT: lfsu f1, 4(r28)
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
; CHECK-NEXT: addi r30, r30, -1
; CHECK-NEXT: cmpldi r30, 0
; CHECK-NEXT: addi r29, r29, -1
; CHECK-NEXT: stb r3, 0(r30)
; CHECK-NEXT: cmpldi r29, 0
; CHECK-NEXT: bc 12, gt, .LBB0_3
; CHECK-NEXT: # %bb.4: # %bb15
; CHECK-NEXT: stb r3, 0(r3)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_5: # %bb2
Expand All @@ -54,12 +58,12 @@ bb4: ; preds = %bb3

bb5: ; preds = %bb5, %bb4
%tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ]
%tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6
%tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6
%tmp8 = load float, ptr %tmp7, align 4
%tmp9 = fpext float %tmp8 to double
%tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2
%tmp11 = trunc i64 %tmp10 to i8
store i8 %tmp11, ptr undef, align 1
store i8 %tmp11, ptr %arg2, align 1
%tmp12 = add nuw i64 %tmp6, 1
%tmp13 = icmp eq i64 %tmp12, %tmp
br i1 %tmp13, label %bb15, label %bb5
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/PowerPC/pr48519.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-NEXT: # %bb.2: # %bb11
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
; CHECK-NEXT: sth r3, 0(r3)
; CHECK-NEXT: sth r3, 128(0)
;
; CHECK-P9-LABEL: julia__typed_vcat_20:
; CHECK-P9: # %bb.0: # %bb
Expand All @@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %bb11
; CHECK-P9-NEXT: xscvdphp f0, f0
; CHECK-P9-NEXT: li r3, 128
; CHECK-P9-NEXT: stxsihx f0, 0, r3
bb:
%i = load i64, ptr addrspace(11) null, align 8
Expand All @@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb
%i6 = add nsw i64 %i5, -1
%i7 = add i64 %i6, 0
%i8 = sitofp i64 %i7 to half
store half %i8, ptr addrspace(13) undef, align 2
store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2
%i9 = icmp eq i64 %i4, 0
%i10 = add i64 %i4, 1
br i1 %i9, label %bb11, label %bb3
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/PowerPC/sms-grp-order.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,32 @@
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s

define void @lame_encode_buffer_interleaved() local_unnamed_addr {
define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr {
; CHECK-LABEL: lame_encode_buffer_interleaved:
; CHECK: # %bb.0:
; CHECK-NEXT: lha 3, 0(3)
; CHECK-NEXT: li 5, 1
; CHECK-NEXT: lhz 4, 0(0)
; CHECK-NEXT: rldic 5, 5, 62, 1
; CHECK-NEXT: mtctr 5
; CHECK-NEXT: srawi 3, 3, 1
; CHECK-NEXT: addze 3, 3
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: rldic 4, 4, 62, 1
; CHECK-NEXT: mtctr 4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: extsh 4, 4
; CHECK-NEXT: lha 4, 0(3)
; CHECK-NEXT: lha 5, 0(3)
; CHECK-NEXT: srawi 4, 4, 1
; CHECK-NEXT: addze 4, 4
; CHECK-NEXT: srawi 5, 5, 1
; CHECK-NEXT: addze 5, 5
; CHECK-NEXT: sth 4, 0(3)
; CHECK-NEXT: sth 5, 0(3)
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: sth 4, 0(0)
; CHECK-NEXT: sth 3, 0(3)
; CHECK-NEXT: blr
%undef = freeze ptr poison
br label %1

1: ; preds = %1, %0
%2 = phi i64 [ 0, %0 ], [ %13, %1 ]
%3 = load i16, ptr null, align 2
%4 = load i16, ptr undef, align 2
%3 = load i16, ptr %arg0, align 2
%4 = load i16, ptr %undef, align 2
%5 = sext i16 %3 to i32
%6 = sext i16 %4 to i32
%7 = add nsw i32 0, %5
Expand All @@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr {
%10 = sdiv i32 %8, 2
%11 = trunc i32 %9 to i16
%12 = trunc i32 %10 to i16
store i16 %11, ptr null, align 2
store i16 %12, ptr undef, align 2
store i16 %11, ptr %arg0, align 2
store i16 %12, ptr %undef, align 2
%13 = add i64 %2, 4
%14 = icmp eq i64 %13, 0
br i1 %14, label %15, label %1
Expand Down
41 changes: 40 additions & 1 deletion llvm/test/Transforms/LICM/pr50367.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
@e = external dso_local global ptr, align 8

define void @main(i1 %arg) {
define void @main(i1 %arg, ptr %arg1) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1:%.*]]
Expand All @@ -11,8 +11,47 @@ define void @main(i1 %arg) {
; CHECK: loop2:
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
; CHECK: loop2.latch:
; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4
; CHECK-NEXT: br label [[LOOP2]]
; CHECK: loop.latch:
; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]]
; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: br label [[LOOP1]]
;
entry:
br label %loop1

loop1:
br label %loop2

loop2:
br i1 %arg, label %loop2.latch, label %loop.latch

loop2.latch:
store i32 0, ptr %arg1, align 4
br label %loop2

loop.latch:
store ptr null, ptr @e, align 8, !tbaa !0
%ptr = load ptr, ptr @e, align 8, !tbaa !0
store i32 0, ptr %ptr, align 4, !tbaa !4
br label %loop1
}

define void @store_null(i1 %arg) {
; CHECK-LABEL: @store_null(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop1:
; CHECK-NEXT: br label [[LOOP2:%.*]]
; CHECK: loop2:
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
; CHECK: loop2.latch:
; CHECK-NEXT: store i32 0, ptr null, align 4
; CHECK-NEXT: br label [[LOOP2]]
; CHECK: loop.latch:
; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: br label [[LOOP1]]
;
entry:
Expand Down
27 changes: 26 additions & 1 deletion llvm/test/Transforms/LICM/pr59324.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ define void @test(ptr %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4
; CHECK-NEXT: store ptr null, ptr null, align 8
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
; CHECK-NEXT: br label [[LOOP]]
;
entry:
Expand All @@ -19,3 +22,25 @@ loop:
store i32 %v, ptr %a
br label %loop
}

define void @test_inttoptr(ptr %a) {
; CHECK-LABEL: @test_inttoptr(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
; CHECK-NEXT: br label [[LOOP]]
;
entry:
br label %loop

loop:
store ptr null, ptr inttoptr (i64 128 to ptr)
%p = load ptr, ptr inttoptr (i64 128 to ptr)
%v = load i32, ptr %p
store i32 %v, ptr %a
br label %loop
}
Loading