From a0cee0af33f27ac37aff44f8c20f76d0f4f1dbd8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Apr 2025 20:10:54 +0700 Subject: [PATCH] LICM: Avoid looking at use list of constant data The codegen test changes seem incidental. Either way, sms-grp-order.ll seems to already not hit the original issue. --- llvm/lib/Transforms/Scalar/LICM.cpp | 12 ++++--- llvm/test/CodeGen/AMDGPU/swdev380865.ll | 9 ++--- llvm/test/CodeGen/PowerPC/pr43527.ll | 22 +++++++----- llvm/test/CodeGen/PowerPC/pr48519.ll | 5 +-- llvm/test/CodeGen/PowerPC/sms-grp-order.ll | 30 ++++++++-------- llvm/test/Transforms/LICM/pr50367.ll | 41 +++++++++++++++++++++- llvm/test/Transforms/LICM/pr59324.ll | 27 +++++++++++++- 7 files changed, 110 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index d872a381050ca..889b43a843bef 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { AliasSetTracker AST(BatchAA); auto IsPotentiallyPromotable = [L](const Instruction *I) { - if (const auto *SI = dyn_cast(I)) - return L->isLoopInvariant(SI->getPointerOperand()); - if (const auto *LI = dyn_cast(I)) - return L->isLoopInvariant(LI->getPointerOperand()); + if (const auto *SI = dyn_cast(I)) { + const Value *PtrOp = SI->getPointerOperand(); + return !isa(PtrOp) && L->isLoopInvariant(PtrOp); + } + if (const auto *LI = dyn_cast(I)) { + const Value *PtrOp = LI->getPointerOperand(); + return !isa(PtrOp) && L->isLoopInvariant(PtrOp); + } return false; }; diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll index 9189cef019cf4..4a5dc8f300af3 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll @@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0 +; CHECK-NEXT: s_mov_b64 s[0:1], 0x100 ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s5, 0x40280000 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s1, s2 ; CHECK-NEXT: s_mov_b32 s2, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, s6 ; CHECK-NEXT: s_mov_b32 s3, 0x40260000 +; CHECK-NEXT: s_mov_b32 s5, 0x40280000 ; CHECK-NEXT: v_mov_b32_e32 v1, s7 ; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5] ; CHECK-NEXT: s_cbranch_scc1 .LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit -; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0x100 ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; CHECK-NEXT: s_endpgm @@ -61,7 +62,7 @@ entry: for.cond4.preheader: ; preds = %for.cond4.preheader, %entry %idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ] - %arrayidx.promoted = load double, ptr addrspace(1) null, align 8 + %arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8 %add9 = fadd contract double %arrayidx.promoted, 0.000000e+00 %add9.1 = fadd contract double %add9, 5.000000e+00 %add9.2 = fadd contract double %add9.1, 6.000000e+00 @@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader %add9.5 = fadd contract double %add9.4, 1.000000e+01 %add9.6 = fadd contract double %add9.5, 1.100000e+01 %add9.7 = fadd contract double %add9.6, 1.200000e+01 - store double %add9.7, ptr addrspace(1) null, align 8 + store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8 %add13 = add i32 %idx.07, %0 %cmp = icmp slt i32 %add13, 2560 br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll index 379bd6c070c77..adfea51077a0b 100644 --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -2,7 +2,7 @@ ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s ; We don't want to produce a CTR loop due to the call to lrint in the body. -define dso_local void @test(i64 %arg, i64 %arg1) { +define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5 @@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: sub r30, r4, r3 -; CHECK-NEXT: li r29, -4 +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: sub r29, r4, r3 +; CHECK-NEXT: addi r28, r5, -4 ; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_3: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: lfsu f1, 4(r29) +; CHECK-NEXT: lfsu f1, 4(r28) ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop -; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: addi r29, r29, -1 +; CHECK-NEXT: stb r3, 0(r30) +; CHECK-NEXT: cmpldi r29, 0 ; CHECK-NEXT: bc 12, gt, .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb15 -; CHECK-NEXT: stb r3, 0(r3) ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_5: # %bb2 @@ -54,12 +58,12 @@ bb4: ; preds = %bb3 bb5: ; preds = %bb5, %bb4 %tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ] - %tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6 + %tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6 %tmp8 = load float, ptr %tmp7, align 4 %tmp9 = fpext float %tmp8 to double %tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2 %tmp11 = trunc i64 %tmp10 to i8 - store i8 %tmp11, ptr undef, align 1 + store i8 %tmp11, ptr %arg2, align 1 %tmp12 = add nuw i64 %tmp6, 1 %tmp13 = icmp eq i64 %tmp12, %tmp br i1 %tmp13, label %bb15, label %bb5 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index fa156454a1313..b610f12159ee2 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-NEXT: # %bb.2: # %bb11 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop -; CHECK-NEXT: sth r3, 0(r3) +; CHECK-NEXT: sth r3, 128(0) ; ; CHECK-P9-LABEL: julia__typed_vcat_20: ; CHECK-P9: # %bb.0: # %bb @@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %bb11 ; CHECK-P9-NEXT: xscvdphp f0, f0 +; CHECK-P9-NEXT: li r3, 128 ; CHECK-P9-NEXT: stxsihx f0, 0, r3 bb: %i = load i64, ptr addrspace(11) null, align 8 @@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb %i6 = add nsw i64 %i5, -1 %i7 = add i64 %i6, 0 %i8 = sitofp i64 %i7 to half - store half %i8, ptr addrspace(13) undef, align 2 + store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2 %i9 = icmp eq i64 %i4, 0 %i10 = add i64 %i4, 1 br i1 %i9, label %bb11, label %bb3 diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll index f72598cb4cbcc..eaea47608eb23 100644 --- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll +++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll @@ -2,32 +2,32 @@ ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ ; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s -define void @lame_encode_buffer_interleaved() local_unnamed_addr { +define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr { ; CHECK-LABEL: lame_encode_buffer_interleaved: ; CHECK: # %bb.0: -; CHECK-NEXT: lha 3, 0(3) -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: lhz 4, 0(0) -; CHECK-NEXT: rldic 5, 5, 62, 1 -; CHECK-NEXT: mtctr 5 -; CHECK-NEXT: srawi 3, 3, 1 -; CHECK-NEXT: addze 3, 3 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: rldic 4, 4, 62, 1 +; CHECK-NEXT: mtctr 4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: extsh 4, 4 +; CHECK-NEXT: lha 4, 0(3) +; CHECK-NEXT: lha 5, 0(3) ; CHECK-NEXT: srawi 4, 4, 1 ; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: srawi 5, 5, 1 +; CHECK-NEXT: addze 5, 5 +; CHECK-NEXT: sth 4, 0(3) +; CHECK-NEXT: sth 5, 0(3) ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: sth 4, 0(0) -; CHECK-NEXT: sth 3, 0(3) ; CHECK-NEXT: blr + %undef = freeze ptr poison br label %1 1: ; preds = %1, %0 %2 = phi i64 [ 0, %0 ], [ %13, %1 ] - %3 = load i16, ptr null, align 2 - %4 = load i16, ptr undef, align 2 + %3 = load i16, ptr %arg0, align 2 + %4 = load i16, ptr %undef, align 2 %5 = sext i16 %3 to i32 %6 = sext i16 %4 to i32 %7 = add nsw i32 0, %5 @@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr { %10 = sdiv i32 %8, 2 %11 = trunc i32 %9 to i16 %12 = trunc i32 %10 to i16 - store i16 %11, ptr null, align 2 - store i16 %12, ptr undef, align 2 + store i16 %11, ptr %arg0, align 2 + store i16 %12, ptr %undef, align 2 %13 = add i64 %2, 4 %14 = icmp eq i64 %13, 0 br i1 %14, label %15, label %1 diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll index a7cf21deff627..7fd176b6c6bb6 100644 --- a/llvm/test/Transforms/LICM/pr50367.ll +++ b/llvm/test/Transforms/LICM/pr50367.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s @e = external dso_local global ptr, align 8 -define void @main(i1 %arg) { +define void @main(i1 %arg, ptr %arg1) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP1:%.*]] @@ -11,8 +11,47 @@ define void @main(i1 %arg) { ; CHECK: loop2: ; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: loop2.latch: +; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP2]] ; CHECK: loop.latch: +; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: br label [[LOOP1]] +; +entry: + br label %loop1 + +loop1: + br label %loop2 + +loop2: + br i1 %arg, label %loop2.latch, label %loop.latch + +loop2.latch: + store i32 0, ptr %arg1, align 4 + br label %loop2 + +loop.latch: + store ptr null, ptr @e, align 8, !tbaa !0 + %ptr = load ptr, ptr @e, align 8, !tbaa !0 + store i32 0, ptr %ptr, align 4, !tbaa !4 + br label %loop1 +} + +define void @store_null(i1 %arg) { +; CHECK-LABEL: @store_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop1: +; CHECK-NEXT: br label [[LOOP2:%.*]] +; CHECK: loop2: +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: loop2.latch: +; CHECK-NEXT: store i32 0, ptr null, align 4 +; CHECK-NEXT: br label [[LOOP2]] +; CHECK: loop.latch: +; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: br label [[LOOP1]] ; entry: diff --git a/llvm/test/Transforms/LICM/pr59324.ll b/llvm/test/Transforms/LICM/pr59324.ll index b0ad60e650696..ec33a0f8ded0f 100644 --- a/llvm/test/Transforms/LICM/pr59324.ll +++ b/llvm/test/Transforms/LICM/pr59324.ll @@ -6,7 +6,10 @@ define void @test(ptr %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: store ptr null, ptr null, align 8 +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -19,3 +22,25 @@ loop: store i32 %v, ptr %a br label %loop } + +define void @test_inttoptr(ptr %a) { +; CHECK-LABEL: @test_inttoptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8 +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + store ptr null, ptr inttoptr (i64 128 to ptr) + %p = load ptr, ptr inttoptr (i64 128 to ptr) + %v = load i32, ptr %p + store i32 %v, ptr %a + br label %loop +}