Skip to content

Commit dddfff4

Browse files
[LLVM][InstCombine] Enable constant folding for SVE asr,lsl and lsr intrinsics.
The SVE intrinsics support shift amounts greater-than-or-equal to the element type's bit-length, essentially saturating the shift amount to the bit-length. However, the IR instructions consider this undefined behaviour that results in poison. To account for this we now ignore the result of the simplifications that result in poison. This allows existing code to be used to simplify the shifts but does mean: 1) We don't simplify cases like "svlsl_s32(x, splat(32)) => 0". 2) We no longer constant fold cases like "svadd(poison, X) => poison" For (1) we'd need dedicated target specific combines anyway and (2) is not an expected use case and so I believe the benefit of reusing the existing simplification functions for shifts outways the loss of propagating poison.
1 parent 63d5e64 commit dddfff4

File tree

2 files changed

+38
-25
lines changed

2 files changed

+38
-25
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+20-4
Original file line numberDiff line numberDiff line change
@@ -1327,11 +1327,14 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
13271327
case Intrinsic::aarch64_sve_umulh:
13281328
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
13291329
case Intrinsic::aarch64_sve_asr:
1330-
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
1330+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u)
1331+
.setMatchingIROpcode(Instruction::AShr);
13311332
case Intrinsic::aarch64_sve_lsl:
1332-
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
1333+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u)
1334+
.setMatchingIROpcode(Instruction::Shl);
13331335
case Intrinsic::aarch64_sve_lsr:
1334-
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
1336+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u)
1337+
.setMatchingIROpcode(Instruction::LShr);
13351338
case Intrinsic::aarch64_sve_and:
13361339
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u)
13371340
.setMatchingIROpcode(Instruction::And);
@@ -1354,6 +1357,9 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
13541357
case Intrinsic::aarch64_sve_and_u:
13551358
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
13561359
Instruction::And);
1360+
case Intrinsic::aarch64_sve_asr_u:
1361+
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
1362+
Instruction::AShr);
13571363
case Intrinsic::aarch64_sve_eor_u:
13581364
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
13591365
Instruction::Xor);
@@ -1369,6 +1375,12 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
13691375
case Intrinsic::aarch64_sve_fsub_u:
13701376
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
13711377
Instruction::FSub);
1378+
case Intrinsic::aarch64_sve_lsl_u:
1379+
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
1380+
Instruction::Shl);
1381+
case Intrinsic::aarch64_sve_lsr_u:
1382+
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
1383+
Instruction::LShr);
13721384
case Intrinsic::aarch64_sve_mul_u:
13731385
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
13741386
Instruction::Mul);
@@ -1571,7 +1583,11 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
15711583
else
15721584
SimpleII = simplifyBinOp(Opc, Op1, Op2, DL);
15731585

1574-
if (!SimpleII)
1586+
// An SVE intrinsic's result is always defined. However, this is not the case
1587+
// for its equivalent IR instruction (e.g. when shifting by an amount more
1588+
// than the data's bitwidth). Simplifications to an undefined result must be
1589+
// ignored to preserve the intrinsic's expected behaviour.
1590+
if (!SimpleII || isa<UndefValue>(SimpleII))
15751591
return std::nullopt;
15761592

15771593
if (IInfo.inactiveLanesAreNotDefined())

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll

+18-21
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
66
define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
77
; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(
88
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0:[0-9]+]] {
9-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
10-
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
9+
; CHECK-NEXT: ret <vscale x 16 x i8> splat (i8 7)
1110
;
1211
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
1312
ret <vscale x 16 x i8> %r
@@ -16,7 +15,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
1615
define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
1716
; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(
1817
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
19-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
18+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -32), <vscale x 16 x i8> splat (i8 -63)
2019
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
2120
;
2221
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -27,7 +26,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
2726
define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
2827
; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(
2928
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
30-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
29+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 -128)
3130
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
3231
;
3332
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
@@ -50,7 +49,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
5049
define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
5150
; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(
5251
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
53-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
52+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -1), <vscale x 8 x i16> splat (i16 -32768)
5453
; CHECK-NEXT: ret <vscale x 8 x i16> [[R]]
5554
;
5655
%r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
@@ -73,7 +72,7 @@ define <vscale x 8 x i16> @constant_asr_i16_shift_by_16(<vscale x 8 x i1> %pg) #
7372
define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
7473
; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(
7574
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
76-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
75+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648)
7776
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
7877
;
7978
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
@@ -96,7 +95,7 @@ define <vscale x 4 x i32> @constant_asr_i32_shift_by_32(<vscale x 4 x i1> %pg) #
9695
define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
9796
; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(
9897
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
99-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
98+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -1), <vscale x 2 x i64> splat (i64 -9223372036854775808)
10099
; CHECK-NEXT: ret <vscale x 2 x i64> [[R]]
101100
;
102101
%r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))
@@ -118,8 +117,7 @@ define <vscale x 2 x i64> @constant_asr_i64_shift_by_64(<vscale x 2 x i1> %pg) #
118117
define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
119118
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(
120119
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
121-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
122-
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
120+
; CHECK-NEXT: ret <vscale x 16 x i8> splat (i8 7)
123121
;
124122
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
125123
ret <vscale x 16 x i8> %r
@@ -128,7 +126,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
128126
define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
129127
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(
130128
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
131-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
129+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -126), <vscale x 16 x i8> splat (i8 -63)
132130
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
133131
;
134132
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -139,7 +137,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
139137
define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
140138
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(
141139
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
142-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
140+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 1)
143141
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
144142
;
145143
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
@@ -161,7 +159,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
161159
define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
162160
; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(
163161
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
164-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
162+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 1)
165163
; CHECK-NEXT: ret <vscale x 8 x i16> [[R]]
166164
;
167165
%r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
@@ -183,7 +181,7 @@ define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16(<vscale x 8 x i1> %pg) #
183181
define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
184182
; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(
185183
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
186-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
184+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 1)
187185
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
188186
;
189187
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
@@ -205,7 +203,7 @@ define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32(<vscale x 4 x i1> %pg) #
205203
define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
206204
; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(
207205
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
208-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
206+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 1)
209207
; CHECK-NEXT: ret <vscale x 2 x i64> [[R]]
210208
;
211209
%r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
@@ -226,8 +224,7 @@ define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64(<vscale x 2 x i1> %pg) #
226224
define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
227225
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(
228226
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
229-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
230-
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
227+
; CHECK-NEXT: ret <vscale x 16 x i8> splat (i8 7)
231228
;
232229
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
233230
ret <vscale x 16 x i8> %r
@@ -236,7 +233,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
236233
define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
237234
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(
238235
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
239-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
236+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 96), <vscale x 16 x i8> splat (i8 -63)
240237
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
241238
;
242239
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -247,7 +244,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
247244
define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
248245
; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(
249246
; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
250-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
247+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 -128)
251248
; CHECK-NEXT: ret <vscale x 16 x i8> [[R]]
252249
;
253250
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
@@ -270,7 +267,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
270267
define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
271268
; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(
272269
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
273-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
270+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 -32768)
274271
; CHECK-NEXT: ret <vscale x 8 x i16> [[R]]
275272
;
276273
%r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
@@ -293,7 +290,7 @@ define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16(<vscale x 8 x i1> %pg) #
293290
define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
294291
; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(
295292
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
296-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
293+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 -2147483648)
297294
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
298295
;
299296
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
@@ -316,7 +313,7 @@ define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32(<vscale x 4 x i1> %pg) #
316313
define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
317314
; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(
318315
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
319-
; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
316+
; CHECK-NEXT: [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 -9223372036854775808)
320317
; CHECK-NEXT: ret <vscale x 2 x i64> [[R]]
321318
;
322319
%r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))

0 commit comments

Comments
 (0)