-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. #137918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
tangaac
wants to merge
3
commits into
llvm:main
Choose a base branch
from
tangaac:shuf4i-d
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesFull diff: https://github.com/llvm/llvm-project/pull/137918.diff 9 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d4e1d9c6f3ca6..4e79d1bd39387 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than 4, lower cost instructions may be used.
- if (Mask.size() < 4)
- return SDValue();
+ unsigned SubVecSize = 4;
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64) {
+ SubVecSize = 2;
+ }
int SubMask[4] = {-1, -1, -1, -1};
- for (unsigned i = 0; i < 4; ++i) {
- for (unsigned j = i; j < Mask.size(); j += 4) {
- int Idx = Mask[j];
+ for (unsigned i = 0; i < SubVecSize; ++i) {
+ for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
+ int M = Mask[j];
// Convert from vector index to 4-element subvector index
// If an index refers to an element outside of the subvector then give up
- if (Idx != -1) {
- Idx -= 4 * (j / 4);
- if (Idx < 0 || Idx >= 4)
+ if (M != -1) {
+ M -= 4 * (j / SubVecSize);
+ if (M < 0 || M >= 4)
return SDValue();
}
// If the mask has an undef, replace it with the current index.
// Note that it might still be undef if the current index is also undef
if (SubMask[i] == -1)
- SubMask[i] = Idx;
+ SubMask[i] = M;
// Check that non-undef values are the same as in the mask. If they
// aren't then give up
- else if (Idx != -1 && Idx != SubMask[i])
+ else if (M != -1 && M != SubMask[i])
return SDValue();
}
}
// Calculate the immediate. Replace any remaining undefs with zero
APInt Imm(64, 0);
- for (int i = 3; i >= 0; --i) {
+ for (int i = SubVecSize-1; i >= 0; --i) {
int Idx = SubMask[i];
if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
Imm |= Idx & 0x3;
}
+ // Return vshuf4i.d and xvshuf4i.d
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64)
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
+ V2, DAG.getConstant(Imm, DL, MVT::i64));
+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
DAG.getConstant(Imm, DL, MVT::i64));
}
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than or equal to 4, lower cost instructions may be
- // used.
- if (Mask.size() <= 4)
- return SDValue();
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
}
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e4268920e0b27..fcc2cac8d0766 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
// XVREPL128VEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..241e835721fb2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVShuf4i_D : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1,2>, SDTCisVT<3, i64>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +54,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
+def loongarch_vshuf4i_d: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
@@ -1914,6 +1916,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
// VREPLVEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index dc4532a7292ab..f3736f669db41 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %c
}
+
+;; xvshuf4i.d
+define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x i64> %c
+}
+
+;; xvshuf4i.d
+define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 171e68306cd11..5882d43257df8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index ac78a26ba4367..d1c071b45ddff 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index 660b9581c3d1f..cd80dcb44e433 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
-;; vilvh.b
+;; vshuf4i.b
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
ret <16 x i8> %c
}
-;; vilvh.h
+;; vshuf4i.h
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %c
}
+
+;; vshuf4i.d
+define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x i64> %c
+}
+
+;; vshuf4i.d
+define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index 7b2bb47424fee..b1e3f74cd1739 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 720fe919601e6..8bf030e94d85d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
; CHECK: # %bb.0:
; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT: vrepli.b $vr1, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Files optimized by this pr can be found |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.