Skip to content

[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from

Conversation

tangaac
Copy link
Contributor

@tangaac tangaac commented Apr 30, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Apr 30, 2025

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/137918.diff

9 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+30-16)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+4)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+6)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll (+20)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll (+4-4)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll (+2-8)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll (+24-4)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll (+3-9)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll (+3-2)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d4e1d9c6f3ca6..4e79d1bd39387 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                            MVT VT, SDValue V1, SDValue V2,
                                            SelectionDAG &DAG) {
 
-  // When the size is less than 4, lower cost instructions may be used.
-  if (Mask.size() < 4)
-    return SDValue();
+  unsigned SubVecSize = 4;
+  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+      VT == MVT::v4i64) {
+    SubVecSize = 2;
+  }
 
   int SubMask[4] = {-1, -1, -1, -1};
-  for (unsigned i = 0; i < 4; ++i) {
-    for (unsigned j = i; j < Mask.size(); j += 4) {
-      int Idx = Mask[j];
+  for (unsigned i = 0; i < SubVecSize; ++i) {
+    for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
+      int M = Mask[j];
 
       // Convert from vector index to 4-element subvector index
       // If an index refers to an element outside of the subvector then give up
-      if (Idx != -1) {
-        Idx -= 4 * (j / 4);
-        if (Idx < 0 || Idx >= 4)
+      if (M != -1) {
+        M -= 4 * (j / SubVecSize);
+        if (M < 0 || M >= 4)
           return SDValue();
       }
 
       // If the mask has an undef, replace it with the current index.
       // Note that it might still be undef if the current index is also undef
       if (SubMask[i] == -1)
-        SubMask[i] = Idx;
+        SubMask[i] = M;
       // Check that non-undef values are the same as in the mask. If they
       // aren't then give up
-      else if (Idx != -1 && Idx != SubMask[i])
+      else if (M != -1 && M != SubMask[i])
         return SDValue();
     }
   }
 
   // Calculate the immediate. Replace any remaining undefs with zero
   APInt Imm(64, 0);
-  for (int i = 3; i >= 0; --i) {
+  for (int i = SubVecSize-1; i >= 0; --i) {
     int Idx = SubMask[i];
 
     if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
     Imm |= Idx & 0x3;
   }
 
+  // Return vshuf4i.d and xvshuf4i.d
+  if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+      VT == MVT::v4i64)
+    return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
+                       V2, DAG.getConstant(Imm, DL, MVT::i64));
+
   return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
                      DAG.getConstant(Imm, DL, MVT::i64));
 }
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     /* V2 = V1; */
   }
 
+  if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
+    if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+      return Result;
+  }
+
   // It is recommended not to change the pattern comparison order for better
   // performance.
   if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
                                             MVT VT, SDValue V1, SDValue V2,
                                             SelectionDAG &DAG) {
-  // When the size is less than or equal to 4, lower cost instructions may be
-  // used.
-  if (Mask.size() <= 4)
-    return SDValue();
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
 }
 
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     /* V2 = V1; */
   }
 
+  if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
+    if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+      return Result;
+  }
+
   // It is recommended not to change the pattern comparison order for better
   // performance.
   if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e4268920e0b27..fcc2cac8d0766 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
         (XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
 def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
         (XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
+        (XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
+        (XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
 
 // XVREPL128VEI_{B/H/W/D}
 def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..241e835721fb2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                      SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
 def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
                                         SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVShuf4i_D : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1,2>, SDTCisVT<3, i64>]>;
 def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
 def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
 def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +54,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
 def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
 
 def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
+def loongarch_vshuf4i_d: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
 def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
 def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
 
@@ -1914,6 +1916,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
         (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
 def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
         (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
+        (VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
+        (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
 
 // VREPLVEI_{B/H/W/D}
 def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index dc4532a7292ab..f3736f669db41 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
     %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     ret <8 x float> %c
 }
+
+;; xvshuf4i.d
+define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+    ret <4 x i64> %c
+}
+
+;; xvshuf4i.d
+define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+    ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 171e68306cd11..5882d43257df8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflevector_pack_ev_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 8
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
     ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
 define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflevector_pack_ev_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 8
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
     ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflodector_pack_od_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 13
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
     ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
 define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflodector_pack_od_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 13
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
     ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index ac78a26ba4367..d1c071b45ddff 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: shufflevector_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT:    vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT:    vori.b $vr0, $vr2, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 12
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
 define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: shufflevector_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT:    vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT:    vori.b $vr0, $vr2, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 12
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index 660b9581c3d1f..cd80dcb44e433 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
 
-;; vilvh.b
+;; vshuf4i.b
 define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
 ; CHECK:       # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
     ret <16 x i8> %c
 }
 
-;; vilvh.h
+;; vshuf4i.h
 define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
 ; CHECK:       # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
     ret <8 x i16> %c
 }
 
-;; vilvh.w
+;; vshuf4i.w
 define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
 ; CHECK:       # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
     ret <4 x i32> %c
 }
 
-;; vilvh.w
+;; vshuf4i.w
 define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
 ; CHECK:       # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
     %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     ret <4 x float> %c
 }
+
+;; vshuf4i.d
+define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+    ret <2 x i64> %c
+}
+
+;; vshuf4i.d
+define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT:    ret
+    %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+    ret <2 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index 7b2bb47424fee..b1e3f74cd1739 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
 define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr1, $vr1, 8
-; CHECK-NEXT:    vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 3
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
     ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
 define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr0, $vr0, 8
-; CHECK-NEXT:    vbsll.v $vr1, $vr1, 8
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
     ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
 define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: byte_rotate_v2i64_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT:    vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
     ret <2 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 720fe919601e6..8bf030e94d85d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vpackev.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 2
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
 define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr1, 9
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %shuffle

Copy link

github-actions bot commented Apr 30, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@tangaac
Copy link
Contributor Author

tangaac commented Apr 30, 2025

Files optimized by this pr can be found
tangaac/loong-opt-cov-ts@27c9a10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants