Skip to content

Commit 1e18162

Browse files
committed
[X86] Merge insertsubvector(load(p0),load_subv(p0),hi) -> subvbroadcast(p0) if either load is oneuse
This fold is currently limited to cases where the load_subv(p0) has oneuse, but its beneficial if either load has oneuse and will be replaced. Yes another yak shave for llvm#122671
1 parent a98c294 commit 1e18162

File tree

2 files changed

+7
-10
lines changed

2 files changed

+7
-10
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58562,8 +58562,9 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5856258562

5856358563
// If we're splatting the lower half subvector of a full vector load into the
5856458564
// upper half, attempt to create a subvector broadcast.
58565-
if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() &&
58566-
Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) {
58565+
if (IdxVal == (OpVT.getVectorNumElements() / 2) &&
58566+
Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits()) &&
58567+
(Vec.hasOneUse() || SubVec.hasOneUse())) {
5856758568
auto *VecLd = dyn_cast<LoadSDNode>(Vec);
5856858569
auto *SubLd = dyn_cast<LoadSDNode>(SubVec);
5856958570
if (VecLd && SubLd &&

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10210,11 +10210,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
1021010210
; AVX512BW-NEXT: vpshufb %ymm28, %ymm18, %ymm23
1021110211
; AVX512BW-NEXT: vporq %ymm2, %ymm23, %ymm2
1021210212
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
10213-
; AVX512BW-NEXT: vmovdqa64 (%rdx), %zmm2
10214-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm14, %zmm2, %zmm2
10213+
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
1021510214
; AVX512BW-NEXT: vpshufb %zmm20, %zmm2, %zmm2
10216-
; AVX512BW-NEXT: vmovdqa64 (%rcx), %zmm20
10217-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm15, %zmm20, %zmm20
10215+
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm20 = mem[0,1,2,3,0,1,2,3]
1021810216
; AVX512BW-NEXT: vpshufb %zmm22, %zmm20, %zmm20
1021910217
; AVX512BW-NEXT: vporq %zmm2, %zmm20, %zmm2
1022010218
; AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,2,3,6,7,6,7]
@@ -10816,11 +10814,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
1081610814
; AVX512DQ-BW-NEXT: vpshufb %ymm28, %ymm18, %ymm23
1081710815
; AVX512DQ-BW-NEXT: vporq %ymm2, %ymm23, %ymm2
1081810816
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
10819-
; AVX512DQ-BW-NEXT: vmovdqa64 (%rdx), %zmm2
10820-
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm14, %zmm2, %zmm2
10817+
; AVX512DQ-BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
1082110818
; AVX512DQ-BW-NEXT: vpshufb %zmm20, %zmm2, %zmm2
10822-
; AVX512DQ-BW-NEXT: vmovdqa64 (%rcx), %zmm20
10823-
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm15, %zmm20, %zmm20
10819+
; AVX512DQ-BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm20 = mem[0,1,2,3,0,1,2,3]
1082410820
; AVX512DQ-BW-NEXT: vpshufb %zmm22, %zmm20, %zmm20
1082510821
; AVX512DQ-BW-NEXT: vporq %zmm2, %zmm20, %zmm2
1082610822
; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,2,3,6,7,6,7]

0 commit comments

Comments
 (0)