@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
994
994
MVT VT, SDValue V1, SDValue V2,
995
995
SelectionDAG &DAG) {
996
996
997
- // When the size is less than 4, lower cost instructions may be used.
998
- if (Mask.size () < 4 )
999
- return SDValue ();
997
+ unsigned SubVecSize = 4 ;
998
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
999
+ VT == MVT::v4i64) {
1000
+ SubVecSize = 2 ;
1001
+ }
1000
1002
1001
1003
int SubMask[4 ] = {-1 , -1 , -1 , -1 };
1002
- for (unsigned i = 0 ; i < 4 ; ++i) {
1003
- for (unsigned j = i; j < Mask.size (); j += 4 ) {
1004
- int Idx = Mask[j];
1004
+ for (unsigned i = 0 ; i < SubVecSize ; ++i) {
1005
+ for (unsigned j = i; j < Mask.size (); j += SubVecSize ) {
1006
+ int M = Mask[j];
1005
1007
1006
1008
// Convert from vector index to 4-element subvector index
1007
1009
// If an index refers to an element outside of the subvector then give up
1008
- if (Idx != -1 ) {
1009
- Idx -= 4 * (j / 4 );
1010
- if (Idx < 0 || Idx >= 4 )
1010
+ if (M != -1 ) {
1011
+ M -= 4 * (j / SubVecSize );
1012
+ if (M < 0 || M >= 4 )
1011
1013
return SDValue ();
1012
1014
}
1013
1015
1014
1016
// If the mask has an undef, replace it with the current index.
1015
1017
// Note that it might still be undef if the current index is also undef
1016
1018
if (SubMask[i] == -1 )
1017
- SubMask[i] = Idx ;
1019
+ SubMask[i] = M ;
1018
1020
// Check that non-undef values are the same as in the mask. If they
1019
1021
// aren't then give up
1020
- else if (Idx != -1 && Idx != SubMask[i])
1022
+ else if (M != -1 && M != SubMask[i])
1021
1023
return SDValue ();
1022
1024
}
1023
1025
}
1024
1026
1025
1027
// Calculate the immediate. Replace any remaining undefs with zero
1026
1028
APInt Imm (64 , 0 );
1027
- for (int i = 3 ; i >= 0 ; --i) {
1029
+ for (int i = SubVecSize - 1 ; i >= 0 ; --i) {
1028
1030
int Idx = SubMask[i];
1029
1031
1030
1032
if (Idx == -1 )
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1034
1036
Imm |= Idx & 0x3 ;
1035
1037
}
1036
1038
1039
+ // Return vshuf4i.d and xvshuf4i.d
1040
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1041
+ VT == MVT::v4i64)
1042
+ return DAG.getNode (LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1043
+ DAG.getConstant (Imm, DL, MVT::i64));
1044
+
1037
1045
return DAG.getNode (LoongArchISD::VSHUF4I, DL, VT, V1,
1038
1046
DAG.getConstant (Imm, DL, MVT::i64));
1039
1047
}
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1343
1351
/* V2 = V1; */
1344
1352
}
1345
1353
1354
+ if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
1355
+ if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG)))
1356
+ return Result;
1357
+ }
1358
+
1346
1359
// It is recommended not to change the pattern comparison order for better
1347
1360
// performance.
1348
1361
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV (DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
1413
1426
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I (const SDLoc &DL, ArrayRef<int > Mask,
1414
1427
MVT VT, SDValue V1, SDValue V2,
1415
1428
SelectionDAG &DAG) {
1416
- // When the size is less than or equal to 4, lower cost instructions may be
1417
- // used.
1418
- if (Mask.size () <= 4 )
1419
- return SDValue ();
1420
1429
return lowerVECTOR_SHUFFLE_VSHUF4I (DL, Mask, VT, V1, V2, DAG);
1421
1430
}
1422
1431
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1784
1793
/* V2 = V1; */
1785
1794
}
1786
1795
1796
+ if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
1797
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask, VT, V1, V2, DAG)))
1798
+ return Result;
1799
+ }
1800
+
1787
1801
// It is recommended not to change the pattern comparison order for better
1788
1802
// performance.
1789
1803
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, NewMask, VT, V1, V2, DAG)))
0 commit comments