[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

tangaac · 2025-04-30T02:59:14Z

No description provided.

llvmbot · 2025-04-30T02:59:47Z

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/137918.diff

9 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+30-16)
(modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+4)
(modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+6)
(modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll (+20)
(modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll (+4-4)
(modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll (+2-8)
(modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll (+24-4)
(modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll (+3-9)
(modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll (+3-2)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d4e1d9c6f3ca6..4e79d1bd39387 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                            MVT VT, SDValue V1, SDValue V2,
                                            SelectionDAG &DAG) {
 
-  // When the size is less than 4, lower cost instructions may be used.
-  if (Mask.size() < 4)
-    return SDValue();
+  unsigned SubVecSize = 4;
+  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+      VT == MVT::v4i64) {
+    SubVecSize = 2;
+  }
 
   int SubMask[4] = {-1, -1, -1, -1};
-  for (unsigned i = 0; i < 4; ++i) {
-    for (unsigned j = i; j < Mask.size(); j += 4) {
-      int Idx = Mask[j];
+  for (unsigned i = 0; i < SubVecSize; ++i) {
+    for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
+      int M = Mask[j];
 
       // Convert from vector index to 4-element subvector index
       // If an index refers to an element outside of the subvector then give up
-      if (Idx != -1) {
-        Idx -= 4 * (j / 4);
-        if (Idx < 0 || Idx >= 4)
+      if (M != -1) {
+        M -= 4 * (j / SubVecSize);
+        if (M < 0 || M >= 4)
           return SDValue();
       }
 
       // If the mask has an undef, replace it with the current index.
       // Note that it might still be undef if the current index is also undef
       if (SubMask[i] == -1)
-        SubMask[i] = Idx;
+        SubMask[i] = M;
       // Check that non-undef values are the same as in the mask. If they
       // aren't then give up
-      else if (Idx != -1 && Idx != SubMask[i])
+      else if (M != -1 && M != SubMask[i])
         return SDValue();
     }
   }
 
   // Calculate the immediate. Replace any remaining undefs with zero
   APInt Imm(64, 0);
-  for (int i = 3; i >= 0; --i) {
+  for (int i = SubVecSize-1; i >= 0; --i) {
     int Idx = SubMask[i];
 
     if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
     Imm |= Idx & 0x3;
   }
 
+  // Return vshuf4i.d and xvshuf4i.d
+  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+      VT == MVT::v4i64)
+    return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
+                       V2, DAG.getConstant(Imm, DL, MVT::i64));
+
   return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
                      DAG.getConstant(Imm, DL, MVT::i64));
 }
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     /* V2 = V1; */
   }
 
+  if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
+    if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+      return Result;
+  }
+
   // It is recommended not to change the pattern comparison order for better
   // performance.
   if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                             MVT VT, SDValue V1, SDValue V2,
                                             SelectionDAG &DAG) {
-  // When the size is less than or equal to 4, lower cost instructions may be
-  // used.
-  if (Mask.size() <= 4)
-    return SDValue();
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
 }
 
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     /* V2 = V1; */
   }
 
+  if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
+    if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+      return Result;
+  }
+
   // It is recommended not to change the pattern comparison order for better
   // performance.
   if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e4268920e0b27..fcc2cac8d0766 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
         (XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
 def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
         (XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
+        (XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
+        (XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
 
 // XVREPL128VEI_{B/H/W/D}
 def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..241e835721fb2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                      SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
 def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
                                         SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVShuf4i_D : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1,2>, SDTCisVT<3, i64>]>;
 def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
 def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
 def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +54,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
 def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
 
 def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
+def loongarch_vshuf4i_d: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
 def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
 def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
 
@@ -1914,6 +1916,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
         (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
 def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
         (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
+        (VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
+        (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
 
 // VREPLVEI_{B/H/W/D}
 def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index dc4532a7292ab..f3736f669db41 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
     %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     ret <8 x float> %c
 }
+
+;; xvshuf4i.d
+define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+    ret <4 x i64> %c
+}
+
+;; xvshuf4i.d
+define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+    ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 171e68306cd11..5882d43257df8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflevector_pack_ev_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 8
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
     ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
 define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflevector_pack_ev_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 8
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
     ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflodector_pack_od_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 13
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
     ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
 define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflodector_pack_od_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 13
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
     ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index ac78a26ba4367..d1c071b45ddff 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflevector_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT:    vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT:    vori.b $vr0, $vr2, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 12
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
 define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflevector_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT:    vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT:    vori.b $vr0, $vr2, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 12
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index 660b9581c3d1f..cd80dcb44e433 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
 
-;; vilvh.b
+;; vshuf4i.b
 define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
 ; CHECK:       # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
     ret <16 x i8> %c
 }
 
-;; vilvh.h
+;; vshuf4i.h
 define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
 ; CHECK:       # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
     ret <8 x i16> %c
 }
 
-;; vilvh.w
+;; vshuf4i.w
 define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
 ; CHECK:       # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
     ret <4 x i32> %c
 }
 
-;; vilvh.w
+;; vshuf4i.w
 define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
 ; CHECK:       # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
     %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     ret <4 x float> %c
 }
+
+;; vshuf4i.d
+define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+    ret <2 x i64> %c
+}
+
+;; vshuf4i.d
+define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+    ret <2 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index 7b2bb47424fee..b1e3f74cd1739 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
 define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr1, $vr1, 8
-; CHECK-NEXT:    vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 3
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
     ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
 define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr0, $vr0, 8
-; CHECK-NEXT:    vbsll.v $vr1, $vr1, 8
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
     ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
 define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT:    vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
     ret <2 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 720fe919601e6..8bf030e94d85d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vpackev.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 2
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
 define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %shuffle

github-actions · 2025-04-30T03:01:34Z

✅ With the latest revision this PR passed the C/C++ code formatter.

tangaac · 2025-04-30T06:52:00Z

Files optimized by this pr can be found
tangaac/loong-opt-cov-ts@27c9a10

llvmbot added the backend:loongarch label Apr 30, 2025

tangaac force-pushed the shuf4i-d branch from eb30019 to 95b21e1 Compare April 30, 2025 03:14

tangaac added 2 commits April 30, 2025 13:57

Lower [x]vshuf.d to [x]vshuf4i.d if possible

4bd2fce

reorder the lower shuffle opertation

c7ffae4

tangaac force-pushed the shuf4i-d branch from ee3de56 to c7ffae4 Compare April 30, 2025 06:05

update test

ec1c780

tangaac force-pushed the shuf4i-d branch from 26a1b1a to ec1c780 Compare April 30, 2025 07:37

tangaac requested review from heiher, ylzsx and SixWeining April 30, 2025 08:43

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

tangaac commented Apr 30, 2025

llvmbot commented Apr 30, 2025

github-actions bot commented Apr 30, 2025 •

edited

Loading

tangaac commented Apr 30, 2025

[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

Are you sure you want to change the base?

[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

Conversation

tangaac commented Apr 30, 2025

llvmbot commented Apr 30, 2025

github-actions bot commented Apr 30, 2025 • edited Loading

tangaac commented Apr 30, 2025

github-actions bot commented Apr 30, 2025 •

edited

Loading