Skip to content

Commit 95b21e1

Browse files
committed
Lower [x]vshuf.d to [x]vshuf4i.d if possible
1 parent 30fec12 commit 95b21e1

File tree

9 files changed

+100
-43
lines changed

9 files changed

+100
-43
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

+30-16
Original file line numberDiff line numberDiff line change
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
994994
MVT VT, SDValue V1, SDValue V2,
995995
SelectionDAG &DAG) {
996996

997-
// When the size is less than 4, lower cost instructions may be used.
998-
if (Mask.size() < 4)
999-
return SDValue();
997+
unsigned SubVecSize = 4;
998+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
999+
VT == MVT::v4i64) {
1000+
SubVecSize = 2;
1001+
}
10001002

10011003
int SubMask[4] = {-1, -1, -1, -1};
1002-
for (unsigned i = 0; i < 4; ++i) {
1003-
for (unsigned j = i; j < Mask.size(); j += 4) {
1004-
int Idx = Mask[j];
1004+
for (unsigned i = 0; i < SubVecSize; ++i) {
1005+
for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1006+
int M = Mask[j];
10051007

10061008
// Convert from vector index to 4-element subvector index
10071009
// If an index refers to an element outside of the subvector then give up
1008-
if (Idx != -1) {
1009-
Idx -= 4 * (j / 4);
1010-
if (Idx < 0 || Idx >= 4)
1010+
if (M != -1) {
1011+
M -= 4 * (j / SubVecSize);
1012+
if (M < 0 || M >= 4)
10111013
return SDValue();
10121014
}
10131015

10141016
// If the mask has an undef, replace it with the current index.
10151017
// Note that it might still be undef if the current index is also undef
10161018
if (SubMask[i] == -1)
1017-
SubMask[i] = Idx;
1019+
SubMask[i] = M;
10181020
// Check that non-undef values are the same as in the mask. If they
10191021
// aren't then give up
1020-
else if (Idx != -1 && Idx != SubMask[i])
1022+
else if (M != -1 && M != SubMask[i])
10211023
return SDValue();
10221024
}
10231025
}
10241026

10251027
// Calculate the immediate. Replace any remaining undefs with zero
10261028
APInt Imm(64, 0);
1027-
for (int i = 3; i >= 0; --i) {
1029+
for (int i = SubVecSize - 1; i >= 0; --i) {
10281030
int Idx = SubMask[i];
10291031

10301032
if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
10341036
Imm |= Idx & 0x3;
10351037
}
10361038

1039+
// Return vshuf4i.d and xvshuf4i.d
1040+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1041+
VT == MVT::v4i64)
1042+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1043+
DAG.getConstant(Imm, DL, MVT::i64));
1044+
10371045
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
10381046
DAG.getConstant(Imm, DL, MVT::i64));
10391047
}
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
13431351
/* V2 = V1; */
13441352
}
13451353

1354+
if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
1355+
if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1356+
return Result;
1357+
}
1358+
13461359
// It is recommended not to change the pattern comparison order for better
13471360
// performance.
13481361
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
14131426
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
14141427
MVT VT, SDValue V1, SDValue V2,
14151428
SelectionDAG &DAG) {
1416-
// When the size is less than or equal to 4, lower cost instructions may be
1417-
// used.
1418-
if (Mask.size() <= 4)
1419-
return SDValue();
14201429
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
14211430
}
14221431

@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
17841793
/* V2 = V1; */
17851794
}
17861795

1796+
if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
1797+
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1798+
return Result;
1799+
}
1800+
17871801
// It is recommended not to change the pattern comparison order for better
17881802
// performance.
17891803
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

+4
Original file line numberDiff line numberDiff line change
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
17291729
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
17301730
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
17311731
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
1732+
def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
1733+
(XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
1734+
def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
1735+
(XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
17321736

17331737
// XVREPL128VEI_{B/H/W/D}
17341738
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

+10
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2525
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
def SDT_LoongArchVShuf4i_D
27+
: SDTypeProfile<1, 3,
28+
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
29+
SDTCisVT<3, i64>]>;
2630
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
2731
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
2832
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +57,8 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
5357
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
5458

5559
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
60+
def loongarch_vshuf4i_d
61+
: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
5662
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
5763
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
5864

@@ -1914,6 +1920,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
19141920
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
19151921
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
19161922
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1923+
def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
1924+
(VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
1925+
def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
1926+
(VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
19171927

19181928
// VREPLVEI_{B/H/W/D}
19191929
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll

+20
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
4141
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
4242
ret <8 x float> %c
4343
}
44+
45+
;; xvshuf4i.d
46+
define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
47+
; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
50+
; CHECK-NEXT: ret
51+
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
52+
ret <4 x i64> %c
53+
}
54+
55+
;; xvshuf4i.d
56+
define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
57+
; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
60+
; CHECK-NEXT: ret
61+
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
62+
ret <4 x double> %c
63+
}

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
3535
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
3636
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
3737
; CHECK: # %bb.0:
38-
; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
38+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
3939
; CHECK-NEXT: ret
4040
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
4141
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
5555
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
5656
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
5757
; CHECK: # %bb.0:
58-
; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
58+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
5959
; CHECK-NEXT: ret
6060
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
6161
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
9595
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
9696
; CHECK-LABEL: shufflodector_pack_od_v2i64:
9797
; CHECK: # %bb.0:
98-
; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
98+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
9999
; CHECK-NEXT: ret
100100
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
101101
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
115115
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
116116
; CHECK-LABEL: shufflodector_pack_od_v2f64:
117117
; CHECK: # %bb.0:
118-
; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
118+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
119119
; CHECK-NEXT: ret
120120
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
121121
ret <2 x double> %c

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll

+2-8
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
4242
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
4343
; CHECK-LABEL: shufflevector_v2i64:
4444
; CHECK: # %bb.0:
45-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
46-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
47-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
48-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
45+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
4946
; CHECK-NEXT: ret
5047
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
5148
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
6865
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
6966
; CHECK-LABEL: shufflevector_v2f64:
7067
; CHECK: # %bb.0:
71-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
72-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
73-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
74-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
68+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
7569
; CHECK-NEXT: ret
7670
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
7771
ret <2 x double> %c

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
33

4-
;; vilvh.b
4+
;; vshuf4i.b
55
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
77
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
1111
ret <16 x i8> %c
1212
}
1313

14-
;; vilvh.h
14+
;; vshuf4i.h
1515
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
1616
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
1717
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
2121
ret <8 x i16> %c
2222
}
2323

24-
;; vilvh.w
24+
;; vshuf4i.w
2525
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
2626
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
2727
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
3131
ret <4 x i32> %c
3232
}
3333

34-
;; vilvh.w
34+
;; vshuf4i.w
3535
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
3636
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
3737
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
4040
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4141
ret <4 x float> %c
4242
}
43+
44+
;; vshuf4i.d
45+
define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
46+
; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
47+
; CHECK: # %bb.0:
48+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
49+
; CHECK-NEXT: ret
50+
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
51+
ret <2 x i64> %c
52+
}
53+
54+
;; vshuf4i.d
55+
define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
56+
; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
59+
; CHECK-NEXT: ret
60+
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
61+
ret <2 x double> %c
62+
}

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll

+3-9
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
103103
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
104104
; CHECK-LABEL: byte_rotate_v2i64_1:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
107-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
108-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
106+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
109107
; CHECK-NEXT: ret
110108
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
111109
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
114112
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
115113
; CHECK-LABEL: byte_rotate_v2i64_2:
116114
; CHECK: # %bb.0:
117-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
118-
; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
119-
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
115+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
120116
; CHECK-NEXT: ret
121117
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
122118
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
125121
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
126122
; CHECK-LABEL: byte_rotate_v2i64_3:
127123
; CHECK: # %bb.0:
128-
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
129-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
130-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
124+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
131125
; CHECK-NEXT: ret
132126
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
133127
ret <2 x i64> %shuffle

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll

+3-2
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
230230
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
231231
; CHECK: # %bb.0:
232232
; CHECK-NEXT: vrepli.b $vr1, 0
233-
; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
233+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
234234
; CHECK-NEXT: ret
235235
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
236236
ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
464464
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
465465
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
466466
; CHECK: # %bb.0:
467-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
467+
; CHECK-NEXT: vrepli.b $vr1, 0
468+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
468469
; CHECK-NEXT: ret
469470
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
470471
ret <2 x i64> %shuffle

0 commit comments

Comments
 (0)