-
Notifications
You must be signed in to change notification settings - Fork 13.4k
LICM: Avoid looking at use list of constant data #134690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LICM: Avoid looking at use list of constant data #134690
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) ChangesThe codegen test changes seem incidental. Either way, Full diff: https://github.com/llvm/llvm-project/pull/134690.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d872a381050ca..889b43a843bef 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
AliasSetTracker AST(BatchAA);
auto IsPotentiallyPromotable = [L](const Instruction *I) {
- if (const auto *SI = dyn_cast<StoreInst>(I))
- return L->isLoopInvariant(SI->getPointerOperand());
- if (const auto *LI = dyn_cast<LoadInst>(I))
- return L->isLoopInvariant(LI->getPointerOperand());
+ if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ const Value *PtrOp = SI->getPointerOperand();
+ return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
+ }
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ const Value *PtrOp = LI->getPointerOperand();
+ return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
+ }
return false;
};
diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
index 9189cef019cf4..4a5dc8f300af3 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
@@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0
+; CHECK-NEXT: s_mov_b64 s[0:1], 0x100
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s1, s2
; CHECK-NEXT: s_mov_b32 s2, 0
; CHECK-NEXT: v_mov_b32_e32 v0, s6
; CHECK-NEXT: s_mov_b32 s3, 0x40260000
+; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x100
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; CHECK-NEXT: s_endpgm
@@ -61,7 +62,7 @@ entry:
for.cond4.preheader: ; preds = %for.cond4.preheader, %entry
%idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ]
- %arrayidx.promoted = load double, ptr addrspace(1) null, align 8
+ %arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add9 = fadd contract double %arrayidx.promoted, 0.000000e+00
%add9.1 = fadd contract double %add9, 5.000000e+00
%add9.2 = fadd contract double %add9.1, 6.000000e+00
@@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader
%add9.5 = fadd contract double %add9.4, 1.000000e+01
%add9.6 = fadd contract double %add9.5, 1.100000e+01
%add9.7 = fadd contract double %add9.6, 1.200000e+01
- store double %add9.7, ptr addrspace(1) null, align 8
+ store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add13 = add i32 %idx.07, %0
%cmp = icmp slt i32 %add13, 2560
br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll
index 379bd6c070c77..adfea51077a0b 100644
--- a/llvm/test/CodeGen/PowerPC/pr43527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43527.ll
@@ -2,7 +2,7 @@
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; We don't want to produce a CTR loop due to the call to lrint in the body.
-define dso_local void @test(i64 %arg, i64 %arg1) {
+define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5
@@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r28, -32
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: sub r30, r4, r3
-; CHECK-NEXT: li r29, -4
+; CHECK-NEXT: mr r30, r5
+; CHECK-NEXT: sub r29, r4, r3
+; CHECK-NEXT: addi r28, r5, -4
; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_3: # %bb5
; CHECK-NEXT: #
-; CHECK-NEXT: lfsu f1, 4(r29)
+; CHECK-NEXT: lfsu f1, 4(r28)
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r30, r30, -1
-; CHECK-NEXT: cmpldi r30, 0
+; CHECK-NEXT: addi r29, r29, -1
+; CHECK-NEXT: stb r3, 0(r30)
+; CHECK-NEXT: cmpldi r29, 0
; CHECK-NEXT: bc 12, gt, .LBB0_3
; CHECK-NEXT: # %bb.4: # %bb15
-; CHECK-NEXT: stb r3, 0(r3)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_5: # %bb2
@@ -54,12 +58,12 @@ bb4: ; preds = %bb3
bb5: ; preds = %bb5, %bb4
%tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ]
- %tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6
+ %tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6
%tmp8 = load float, ptr %tmp7, align 4
%tmp9 = fpext float %tmp8 to double
%tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2
%tmp11 = trunc i64 %tmp10 to i8
- store i8 %tmp11, ptr undef, align 1
+ store i8 %tmp11, ptr %arg2, align 1
%tmp12 = add nuw i64 %tmp6, 1
%tmp13 = icmp eq i64 %tmp12, %tmp
br i1 %tmp13, label %bb15, label %bb5
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index fa156454a1313..b610f12159ee2 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-NEXT: # %bb.2: # %bb11
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
-; CHECK-NEXT: sth r3, 0(r3)
+; CHECK-NEXT: sth r3, 128(0)
;
; CHECK-P9-LABEL: julia__typed_vcat_20:
; CHECK-P9: # %bb.0: # %bb
@@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %bb11
; CHECK-P9-NEXT: xscvdphp f0, f0
+; CHECK-P9-NEXT: li r3, 128
; CHECK-P9-NEXT: stxsihx f0, 0, r3
bb:
%i = load i64, ptr addrspace(11) null, align 8
@@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb
%i6 = add nsw i64 %i5, -1
%i7 = add i64 %i6, 0
%i8 = sitofp i64 %i7 to half
- store half %i8, ptr addrspace(13) undef, align 2
+ store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2
%i9 = icmp eq i64 %i4, 0
%i10 = add i64 %i4, 1
br i1 %i9, label %bb11, label %bb3
diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
index f72598cb4cbcc..eaea47608eb23 100644
--- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
@@ -2,32 +2,32 @@
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s
-define void @lame_encode_buffer_interleaved() local_unnamed_addr {
+define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr {
; CHECK-LABEL: lame_encode_buffer_interleaved:
; CHECK: # %bb.0:
-; CHECK-NEXT: lha 3, 0(3)
-; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: lhz 4, 0(0)
-; CHECK-NEXT: rldic 5, 5, 62, 1
-; CHECK-NEXT: mtctr 5
-; CHECK-NEXT: srawi 3, 3, 1
-; CHECK-NEXT: addze 3, 3
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: rldic 4, 4, 62, 1
+; CHECK-NEXT: mtctr 4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: extsh 4, 4
+; CHECK-NEXT: lha 4, 0(3)
+; CHECK-NEXT: lha 5, 0(3)
; CHECK-NEXT: srawi 4, 4, 1
; CHECK-NEXT: addze 4, 4
+; CHECK-NEXT: srawi 5, 5, 1
+; CHECK-NEXT: addze 5, 5
+; CHECK-NEXT: sth 4, 0(3)
+; CHECK-NEXT: sth 5, 0(3)
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: sth 4, 0(0)
-; CHECK-NEXT: sth 3, 0(3)
; CHECK-NEXT: blr
+ %undef = freeze ptr poison
br label %1
1: ; preds = %1, %0
%2 = phi i64 [ 0, %0 ], [ %13, %1 ]
- %3 = load i16, ptr null, align 2
- %4 = load i16, ptr undef, align 2
+ %3 = load i16, ptr %arg0, align 2
+ %4 = load i16, ptr %undef, align 2
%5 = sext i16 %3 to i32
%6 = sext i16 %4 to i32
%7 = add nsw i32 0, %5
@@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr {
%10 = sdiv i32 %8, 2
%11 = trunc i32 %9 to i16
%12 = trunc i32 %10 to i16
- store i16 %11, ptr null, align 2
- store i16 %12, ptr undef, align 2
+ store i16 %11, ptr %arg0, align 2
+ store i16 %12, ptr %undef, align 2
%13 = add i64 %2, 4
%14 = icmp eq i64 %13, 0
br i1 %14, label %15, label %1
diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll
index a7cf21deff627..7fd176b6c6bb6 100644
--- a/llvm/test/Transforms/LICM/pr50367.ll
+++ b/llvm/test/Transforms/LICM/pr50367.ll
@@ -2,7 +2,7 @@
; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
@e = external dso_local global ptr, align 8
-define void @main(i1 %arg) {
+define void @main(i1 %arg, ptr %arg1) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1:%.*]]
@@ -11,8 +11,47 @@ define void @main(i1 %arg) {
; CHECK: loop2:
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
; CHECK: loop2.latch:
+; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4
; CHECK-NEXT: br label [[LOOP2]]
; CHECK: loop.latch:
+; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT: br label [[LOOP1]]
+;
+entry:
+ br label %loop1
+
+loop1:
+ br label %loop2
+
+loop2:
+ br i1 %arg, label %loop2.latch, label %loop.latch
+
+loop2.latch:
+ store i32 0, ptr %arg1, align 4
+ br label %loop2
+
+loop.latch:
+ store ptr null, ptr @e, align 8, !tbaa !0
+ %ptr = load ptr, ptr @e, align 8, !tbaa !0
+ store i32 0, ptr %ptr, align 4, !tbaa !4
+ br label %loop1
+}
+
+define void @store_null(i1 %arg) {
+; CHECK-LABEL: @store_null(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop1:
+; CHECK-NEXT: br label [[LOOP2:%.*]]
+; CHECK: loop2:
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK: loop2.latch:
+; CHECK-NEXT: store i32 0, ptr null, align 4
+; CHECK-NEXT: br label [[LOOP2]]
+; CHECK: loop.latch:
+; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: br label [[LOOP1]]
;
entry:
diff --git a/llvm/test/Transforms/LICM/pr59324.ll b/llvm/test/Transforms/LICM/pr59324.ll
index b0ad60e650696..ec33a0f8ded0f 100644
--- a/llvm/test/Transforms/LICM/pr59324.ll
+++ b/llvm/test/Transforms/LICM/pr59324.ll
@@ -6,7 +6,10 @@ define void @test(ptr %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT: store ptr null, ptr null, align 8
+; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -19,3 +22,25 @@ loop:
store i32 %v, ptr %a
br label %loop
}
+
+define void @test_inttoptr(ptr %a) {
+; CHECK-LABEL: @test_inttoptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8
+; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ store ptr null, ptr inttoptr (i64 128 to ptr)
+ %p = load ptr, ptr inttoptr (i64 128 to ptr)
+ %v = load i32, ptr %p
+ store i32 %v, ptr %a
+ br label %loop
+}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Scalar/LICM.cpp llvm/test/CodeGen/AMDGPU/swdev380865.ll llvm/test/CodeGen/PowerPC/pr43527.ll llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/sms-grp-order.ll llvm/test/Transforms/LICM/pr50367.ll llvm/test/Transforms/LICM/pr59324.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
return L->isLoopInvariant(LI->getPointerOperand()); | ||
if (const auto *SI = dyn_cast<StoreInst>(I)) { | ||
const Value *PtrOp = SI->getPointerOperand(); | ||
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Surely ConstantData is just constant, so it is guaranteed to be loop-invariant, so this should be:
return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp); | |
return isa<ConstantData>(PtrOp) || L->isLoopInvariant(PtrOp); |
And couldn't the ConstantData check be done inside of isLoopInvariant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's loop invariant -- the point here is to skip promotion for constant data, even if its loop invariant, because promotion works by iterating over the use list. In practice, this basically means that we'll not try to promote loads/stores to null pointers, which is just a bugpoint/reduce-ism.
a87e23c
to
d4cc3ae
Compare
7954b84
to
fa88dda
Compare
d4cc3ae
to
2c9dfd2
Compare
The codegen test changes seem incidental. Either way, sms-grp-order.ll seems to already not hit the original issue.
2c9dfd2
to
744e2c1
Compare
fa88dda
to
a0cee0a
Compare
The codegen test changes seem incidental. Either way, sms-grp-order.ll seems to already not hit the original issue.
The codegen test changes seem incidental. Either way, sms-grp-order.ll seems to already not hit the original issue.
The codegen test changes seem incidental. Either way,
sms-grp-order.ll seems to already not hit the original issue.