Skip to content

[SelectionDAG] Convert to or mask if all insertions are -1 #138213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented May 1, 2025

We did this for 0 and and, but we can do this with or and -1.

@llvmbot
Copy link
Member

llvmbot commented May 1, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

We did this for 0 and and, but we can do this with or and -1.


Full diff: https://github.com/llvm/llvm-project/pull/138213.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+13-1)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll (+5-11)
  • (modified) llvm/test/CodeGen/X86/avx-cvt-3.ll (+2-6)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..1645acb9d3fd0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22974,7 +22974,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       }
 
       // If all insertions are zero value, try to convert to AND mask.
-      // TODO: Do this for -1 with OR mask?
       if (!LegalOperations && llvm::isNullConstant(InVal) &&
           all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
           count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
@@ -22987,6 +22986,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
                            DAG.getBuildVector(VT, DL, Mask));
       }
 
+      // If all insertions are -1, try to convert to OR mask.
+      if (!LegalOperations && llvm::isAllOnesConstant(InVal) &&
+          all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
+          count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
+        SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
+        SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
+        SmallVector<SDValue, 8> Mask(NumElts);
+        for (unsigned I = 0; I != NumElts; ++I)
+          Mask[I] = Ops[I] ? AllOnes : Zero;
+        return DAG.getNode(ISD::OR, DL, VT, CurVec,
+                           DAG.getBuildVector(VT, DL, Mask));
+      }
+
       // Failed to find a match in the chain - bail.
       break;
     }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 7fa416e0dbcd5..d2f16721e6e47 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -101,19 +101,13 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
 define i8 @test_v9i8(<9 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v9i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    mov v1.b[9], w8
-; CHECK-NEXT:    mov v1.b[10], w8
-; CHECK-NEXT:    mov v1.b[11], w8
-; CHECK-NEXT:    mov v1.b[12], w8
-; CHECK-NEXT:    mov v1.b[13], w8
-; CHECK-NEXT:    mov v1.b[14], w8
-; CHECK-NEXT:    mov v1.b[15], w8
+; CHECK-NEXT:    movi v1.2d, #0xffffff00ffffff00
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    orr v1.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
 ; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    and x8, x8, x8, lsr #32
+; CHECK-NEXT:    fmov x9, d0
+; CHECK-NEXT:    and x8, x9, x8, lsr #32
 ; CHECK-NEXT:    and x8, x8, x8, lsr #16
 ; CHECK-NEXT:    lsr x9, x8, #8
 ; CHECK-NEXT:    and w0, w8, w9
diff --git a/llvm/test/CodeGen/X86/avx-cvt-3.ll b/llvm/test/CodeGen/X86/avx-cvt-3.ll
index 87eabd9cb5521..760db4af1f1b4 100644
--- a/llvm/test/CodeGen/X86/avx-cvt-3.ll
+++ b/llvm/test/CodeGen/X86/avx-cvt-3.ll
@@ -48,17 +48,13 @@ define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
 define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
 ; X86-LABEL: sitofp_insert_allbits_v8i32:
 ; X86:       # %bb.0:
-; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X86-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
-; X86-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
+; X86-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sitofp_insert_allbits_v8i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
-; X64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
+; X64-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; X64-NEXT:    retq
   %1 = insertelement <8 x i32> %a0, i32 -1, i32 0

@AZero13 AZero13 force-pushed the shifts branch 2 times, most recently from 854648d to a4760e9 Compare May 1, 2025 23:53
@RKSimon RKSimon self-requested a review May 2, 2025 12:07
We did this for 0 and and, but we can do this with or and -1.
@AZero13
Copy link
Contributor Author

AZero13 commented May 2, 2025

@RKSimon @arsenm Done!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants