@@ -207,6 +207,22 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
207
207
ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
208
208
}
209
209
210
+ ; We only extract some of the fields.
211
+ define {<4 x i32 >, <4 x i32 >} @vpload_factor3_partial (ptr %ptr ) {
212
+ ; CHECK-LABEL: vpload_factor3_partial:
213
+ ; CHECK: # %bb.0:
214
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215
+ ; CHECK-NEXT: vlseg3e32.v v7, (a0)
216
+ ; CHECK-NEXT: vmv1r.v v8, v7
217
+ ; CHECK-NEXT: ret
218
+ %interleaved.vec = tail call <12 x i32 > @llvm.vp.load.v12i32.p0 (ptr %ptr , <12 x i1 > splat (i1 true ), i32 12 )
219
+ %v0 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 >
220
+ %v2 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 >
221
+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} poison, <4 x i32 > %v0 , 0
222
+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v2 , 1
223
+ ret {<4 x i32 >, <4 x i32 >} %res1
224
+ }
225
+
210
226
; Load a larger vector but only deinterleave a subset of the elements.
211
227
define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_v16i32 (ptr %ptr ) {
212
228
; CHECK-LABEL: vpload_factor3_v16i32:
@@ -224,6 +240,7 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) {
224
240
ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
225
241
}
226
242
243
+ ; Make sure the mask is propagated.
227
244
define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_mask (ptr %ptr ) {
228
245
; CHECK-LABEL: vpload_factor3_mask:
229
246
; CHECK: # %bb.0:
@@ -241,6 +258,24 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) {
241
258
ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
242
259
}
243
260
261
+ ; Poison/undef in the shuffle mask shouldn't affect anything.
262
+ define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_poison_shufflemask (ptr %ptr ) {
263
+ ; CHECK-LABEL: vpload_factor3_poison_shufflemask:
264
+ ; CHECK: # %bb.0:
265
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
266
+ ; CHECK-NEXT: vmv.v.i v0, 10
267
+ ; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
268
+ ; CHECK-NEXT: ret
269
+ %interleaved.vec = tail call <12 x i32 > @llvm.vp.load.v12i32.p0 (ptr %ptr , <12 x i1 > <i1 0 , i1 0 , i1 0 , i1 1 , i1 1 , i1 1 , i1 0 , i1 0 , i1 0 , i1 1 , i1 1 , i1 1 >, i32 12 )
270
+ %v0 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 >
271
+ %v1 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 1 , i32 4 , i32 poison, i32 10 >
272
+ %v2 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 >
273
+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
274
+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
275
+ %res2 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res1 , <4 x i32 > %v2 , 2
276
+ ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
277
+ }
278
+
244
279
define {<4 x i32 >, <4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor4 (ptr %ptr ) {
245
280
; CHECK-LABEL: vpload_factor4:
246
281
; CHECK: # %bb.0:
@@ -367,8 +402,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
367
402
; RV32-NEXT: li a2, 32
368
403
; RV32-NEXT: lui a3, 12
369
404
; RV32-NEXT: lui a6, 12291
370
- ; RV32-NEXT: lui a7, %hi(.LCPI17_0 )
371
- ; RV32-NEXT: addi a7, a7, %lo(.LCPI17_0 )
405
+ ; RV32-NEXT: lui a7, %hi(.LCPI19_0 )
406
+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI19_0 )
372
407
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
373
408
; RV32-NEXT: vle32.v v24, (a5)
374
409
; RV32-NEXT: vmv.s.x v0, a3
@@ -453,12 +488,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
453
488
; RV32-NEXT: addi a1, a1, 16
454
489
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
455
490
; RV32-NEXT: lui a7, 49164
456
- ; RV32-NEXT: lui a1, %hi(.LCPI17_1 )
457
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_1 )
491
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_1 )
492
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_1 )
458
493
; RV32-NEXT: lui t2, 3
459
494
; RV32-NEXT: lui t1, 196656
460
- ; RV32-NEXT: lui a4, %hi(.LCPI17_3 )
461
- ; RV32-NEXT: addi a4, a4, %lo(.LCPI17_3 )
495
+ ; RV32-NEXT: lui a4, %hi(.LCPI19_3 )
496
+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI19_3 )
462
497
; RV32-NEXT: lui t0, 786624
463
498
; RV32-NEXT: li a5, 48
464
499
; RV32-NEXT: lui a6, 768
@@ -637,8 +672,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
637
672
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
638
673
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
639
674
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
640
- ; RV32-NEXT: lui a1, %hi(.LCPI17_2 )
641
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_2 )
675
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_2 )
676
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_2 )
642
677
; RV32-NEXT: lui a3, 3073
643
678
; RV32-NEXT: addi a3, a3, -1024
644
679
; RV32-NEXT: vmv.s.x v0, a3
@@ -702,16 +737,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
702
737
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
703
738
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
704
739
; RV32-NEXT: vmv.v.v v28, v24
705
- ; RV32-NEXT: lui a1, %hi(.LCPI17_4 )
706
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_4 )
707
- ; RV32-NEXT: lui a2, %hi(.LCPI17_5 )
708
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI17_5 )
740
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_4 )
741
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_4 )
742
+ ; RV32-NEXT: lui a2, %hi(.LCPI19_5 )
743
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_5 )
709
744
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
710
745
; RV32-NEXT: vle16.v v24, (a2)
711
746
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
712
747
; RV32-NEXT: vle16.v v8, (a1)
713
- ; RV32-NEXT: lui a1, %hi(.LCPI17_7 )
714
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_7 )
748
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_7 )
749
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_7 )
715
750
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
716
751
; RV32-NEXT: vle16.v v10, (a1)
717
752
; RV32-NEXT: csrr a1, vlenb
@@ -739,14 +774,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
739
774
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
740
775
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
741
776
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
742
- ; RV32-NEXT: lui a1, %hi(.LCPI17_6 )
743
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_6 )
744
- ; RV32-NEXT: lui a2, %hi(.LCPI17_8 )
745
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI17_8 )
777
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_6 )
778
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_6 )
779
+ ; RV32-NEXT: lui a2, %hi(.LCPI19_8 )
780
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_8 )
746
781
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
747
782
; RV32-NEXT: vle16.v v4, (a1)
748
- ; RV32-NEXT: lui a1, %hi(.LCPI17_9 )
749
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_9 )
783
+ ; RV32-NEXT: lui a1, %hi(.LCPI19_9 )
784
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_9 )
750
785
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
751
786
; RV32-NEXT: vle16.v v6, (a1)
752
787
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -833,8 +868,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
833
868
; RV64-NEXT: li a4, 128
834
869
; RV64-NEXT: lui a1, 1
835
870
; RV64-NEXT: vle64.v v8, (a3)
836
- ; RV64-NEXT: lui a3, %hi(.LCPI17_0 )
837
- ; RV64-NEXT: addi a3, a3, %lo(.LCPI17_0 )
871
+ ; RV64-NEXT: lui a3, %hi(.LCPI19_0 )
872
+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI19_0 )
838
873
; RV64-NEXT: vmv.s.x v0, a4
839
874
; RV64-NEXT: csrr a4, vlenb
840
875
; RV64-NEXT: li a5, 61
@@ -1022,8 +1057,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1022
1057
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
1023
1058
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
1024
1059
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1025
- ; RV64-NEXT: lui a2, %hi(.LCPI17_1 )
1026
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI17_1 )
1060
+ ; RV64-NEXT: lui a2, %hi(.LCPI19_1 )
1061
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_1 )
1027
1062
; RV64-NEXT: li a3, 192
1028
1063
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1029
1064
; RV64-NEXT: vle16.v v6, (a2)
@@ -1057,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1057
1092
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
1058
1093
; RV64-NEXT: addi a2, sp, 16
1059
1094
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1060
- ; RV64-NEXT: lui a2, %hi(.LCPI17_2 )
1061
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI17_2 )
1095
+ ; RV64-NEXT: lui a2, %hi(.LCPI19_2 )
1096
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_2 )
1062
1097
; RV64-NEXT: li a3, 1040
1063
1098
; RV64-NEXT: vmv.s.x v0, a3
1064
1099
; RV64-NEXT: addi a1, a1, -2016
@@ -1142,12 +1177,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1142
1177
; RV64-NEXT: add a1, sp, a1
1143
1178
; RV64-NEXT: addi a1, a1, 16
1144
1179
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1145
- ; RV64-NEXT: lui a1, %hi(.LCPI17_3 )
1146
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_3 )
1180
+ ; RV64-NEXT: lui a1, %hi(.LCPI19_3 )
1181
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_3 )
1147
1182
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1148
1183
; RV64-NEXT: vle16.v v20, (a1)
1149
- ; RV64-NEXT: lui a1, %hi(.LCPI17_4 )
1150
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_4 )
1184
+ ; RV64-NEXT: lui a1, %hi(.LCPI19_4 )
1185
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_4 )
1151
1186
; RV64-NEXT: vle16.v v8, (a1)
1152
1187
; RV64-NEXT: csrr a1, vlenb
1153
1188
; RV64-NEXT: li a2, 77
@@ -1198,8 +1233,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1198
1233
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
1199
1234
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1200
1235
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1201
- ; RV64-NEXT: lui a1, %hi(.LCPI17_5 )
1202
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_5 )
1236
+ ; RV64-NEXT: lui a1, %hi(.LCPI19_5 )
1237
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_5 )
1203
1238
; RV64-NEXT: vle16.v v20, (a1)
1204
1239
; RV64-NEXT: csrr a1, vlenb
1205
1240
; RV64-NEXT: li a2, 61
@@ -1643,8 +1678,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
1643
1678
; RV32-NEXT: vle32.v v12, (a0), v0.t
1644
1679
; RV32-NEXT: li a0, 36
1645
1680
; RV32-NEXT: vmv.s.x v20, a1
1646
- ; RV32-NEXT: lui a1, %hi(.LCPI42_0 )
1647
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0 )
1681
+ ; RV32-NEXT: lui a1, %hi(.LCPI44_0 )
1682
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0 )
1648
1683
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1649
1684
; RV32-NEXT: vle16.v v21, (a1)
1650
1685
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1719,8 +1754,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
1719
1754
; RV32-NEXT: vmv.s.x v10, a0
1720
1755
; RV32-NEXT: li a0, 146
1721
1756
; RV32-NEXT: vmv.s.x v11, a0
1722
- ; RV32-NEXT: lui a0, %hi(.LCPI43_0 )
1723
- ; RV32-NEXT: addi a0, a0, %lo(.LCPI43_0 )
1757
+ ; RV32-NEXT: lui a0, %hi(.LCPI45_0 )
1758
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI45_0 )
1724
1759
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1725
1760
; RV32-NEXT: vle16.v v20, (a0)
1726
1761
; RV32-NEXT: li a0, 36
0 commit comments