@@ -294,6 +294,31 @@ define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor4(ptr %ptr) {
294
294
ret {<4 x i32 >, <4 x i32 >, <4 x i32 >, <4 x i32 >} %res3
295
295
}
296
296
297
+ ; TODO: Add more tests for vp.load/store + (de)interleave intrinsics with fixed vectors.
298
+ define {<2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >} @vpload_factor4_intrinsics (ptr %ptr ) {
299
+ ; CHECK-LABEL: vpload_factor4_intrinsics:
300
+ ; CHECK: # %bb.0:
301
+ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
302
+ ; CHECK-NEXT: vlseg4e32.v v8, (a0)
303
+ ; CHECK-NEXT: ret
304
+ %wide.masked.load = call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > splat (i1 true ), i32 8 )
305
+ %d0 = call { <4 x i32 >, <4 x i32 > } @llvm.vector.deinterleave2.v8i32 (<8 x i32 > %wide.masked.load )
306
+ %d0.0 = extractvalue { <4 x i32 >, <4 x i32 > } %d0 , 0
307
+ %d0.1 = extractvalue { <4 x i32 >, <4 x i32 > } %d0 , 1
308
+ %d1 = call { <2 x i32 >, <2 x i32 > } @llvm.vector.deinterleave2.v4i32 (<4 x i32 > %d0.0 )
309
+ %t0 = extractvalue { <2 x i32 >, <2 x i32 > } %d1 , 0
310
+ %t2 = extractvalue { <2 x i32 >, <2 x i32 > } %d1 , 1
311
+ %d2 = call { <2 x i32 >, <2 x i32 > } @llvm.vector.deinterleave2.v4i32 (<4 x i32 > %d0.1 )
312
+ %t1 = extractvalue { <2 x i32 >, <2 x i32 > } %d2 , 0
313
+ %t3 = extractvalue { <2 x i32 >, <2 x i32 > } %d2 , 1
314
+
315
+ %res0 = insertvalue { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } poison, <2 x i32 > %t0 , 0
316
+ %res1 = insertvalue { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } %res0 , <2 x i32 > %t1 , 1
317
+ %res2 = insertvalue { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } %res1 , <2 x i32 > %t2 , 2
318
+ %res3 = insertvalue { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } %res2 , <2 x i32 > %t3 , 3
319
+ ret { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } %res3
320
+ }
321
+
297
322
define {<4 x i32 >, <4 x i32 >, <4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor5 (ptr %ptr ) {
298
323
; CHECK-LABEL: vpload_factor5:
299
324
; CHECK: # %bb.0:
@@ -402,8 +427,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
402
427
; RV32-NEXT: li a2, 32
403
428
; RV32-NEXT: lui a3, 12
404
429
; RV32-NEXT: lui a6, 12291
405
- ; RV32-NEXT: lui a7, %hi(.LCPI19_0 )
406
- ; RV32-NEXT: addi a7, a7, %lo(.LCPI19_0 )
430
+ ; RV32-NEXT: lui a7, %hi(.LCPI20_0 )
431
+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0 )
407
432
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
408
433
; RV32-NEXT: vle32.v v24, (a5)
409
434
; RV32-NEXT: vmv.s.x v0, a3
@@ -488,12 +513,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
488
513
; RV32-NEXT: addi a1, a1, 16
489
514
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
490
515
; RV32-NEXT: lui a7, 49164
491
- ; RV32-NEXT: lui a1, %hi(.LCPI19_1 )
492
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_1 )
516
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_1 )
517
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1 )
493
518
; RV32-NEXT: lui t2, 3
494
519
; RV32-NEXT: lui t1, 196656
495
- ; RV32-NEXT: lui a4, %hi(.LCPI19_3 )
496
- ; RV32-NEXT: addi a4, a4, %lo(.LCPI19_3 )
520
+ ; RV32-NEXT: lui a4, %hi(.LCPI20_3 )
521
+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3 )
497
522
; RV32-NEXT: lui t0, 786624
498
523
; RV32-NEXT: li a5, 48
499
524
; RV32-NEXT: lui a6, 768
@@ -672,8 +697,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
672
697
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
673
698
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
674
699
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
675
- ; RV32-NEXT: lui a1, %hi(.LCPI19_2 )
676
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_2 )
700
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_2 )
701
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2 )
677
702
; RV32-NEXT: lui a3, 3073
678
703
; RV32-NEXT: addi a3, a3, -1024
679
704
; RV32-NEXT: vmv.s.x v0, a3
@@ -737,16 +762,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
737
762
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
738
763
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
739
764
; RV32-NEXT: vmv.v.v v28, v24
740
- ; RV32-NEXT: lui a1, %hi(.LCPI19_4 )
741
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_4 )
742
- ; RV32-NEXT: lui a2, %hi(.LCPI19_5 )
743
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_5 )
765
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_4 )
766
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4 )
767
+ ; RV32-NEXT: lui a2, %hi(.LCPI20_5 )
768
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5 )
744
769
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
745
770
; RV32-NEXT: vle16.v v24, (a2)
746
771
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
747
772
; RV32-NEXT: vle16.v v8, (a1)
748
- ; RV32-NEXT: lui a1, %hi(.LCPI19_7 )
749
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_7 )
773
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_7 )
774
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7 )
750
775
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
751
776
; RV32-NEXT: vle16.v v10, (a1)
752
777
; RV32-NEXT: csrr a1, vlenb
@@ -774,14 +799,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
774
799
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
775
800
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
776
801
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
777
- ; RV32-NEXT: lui a1, %hi(.LCPI19_6 )
778
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_6 )
779
- ; RV32-NEXT: lui a2, %hi(.LCPI19_8 )
780
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_8 )
802
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_6 )
803
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6 )
804
+ ; RV32-NEXT: lui a2, %hi(.LCPI20_8 )
805
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8 )
781
806
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
782
807
; RV32-NEXT: vle16.v v4, (a1)
783
- ; RV32-NEXT: lui a1, %hi(.LCPI19_9 )
784
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_9 )
808
+ ; RV32-NEXT: lui a1, %hi(.LCPI20_9 )
809
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9 )
785
810
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
786
811
; RV32-NEXT: vle16.v v6, (a1)
787
812
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -868,8 +893,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
868
893
; RV64-NEXT: li a4, 128
869
894
; RV64-NEXT: lui a1, 1
870
895
; RV64-NEXT: vle64.v v8, (a3)
871
- ; RV64-NEXT: lui a3, %hi(.LCPI19_0 )
872
- ; RV64-NEXT: addi a3, a3, %lo(.LCPI19_0 )
896
+ ; RV64-NEXT: lui a3, %hi(.LCPI20_0 )
897
+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0 )
873
898
; RV64-NEXT: vmv.s.x v0, a4
874
899
; RV64-NEXT: csrr a4, vlenb
875
900
; RV64-NEXT: li a5, 61
@@ -1057,8 +1082,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1057
1082
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
1058
1083
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
1059
1084
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1060
- ; RV64-NEXT: lui a2, %hi(.LCPI19_1 )
1061
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_1 )
1085
+ ; RV64-NEXT: lui a2, %hi(.LCPI20_1 )
1086
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1 )
1062
1087
; RV64-NEXT: li a3, 192
1063
1088
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1064
1089
; RV64-NEXT: vle16.v v6, (a2)
@@ -1092,8 +1117,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1092
1117
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
1093
1118
; RV64-NEXT: addi a2, sp, 16
1094
1119
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1095
- ; RV64-NEXT: lui a2, %hi(.LCPI19_2 )
1096
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_2 )
1120
+ ; RV64-NEXT: lui a2, %hi(.LCPI20_2 )
1121
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2 )
1097
1122
; RV64-NEXT: li a3, 1040
1098
1123
; RV64-NEXT: vmv.s.x v0, a3
1099
1124
; RV64-NEXT: addi a1, a1, -2016
@@ -1177,12 +1202,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1177
1202
; RV64-NEXT: add a1, sp, a1
1178
1203
; RV64-NEXT: addi a1, a1, 16
1179
1204
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1180
- ; RV64-NEXT: lui a1, %hi(.LCPI19_3 )
1181
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_3 )
1205
+ ; RV64-NEXT: lui a1, %hi(.LCPI20_3 )
1206
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3 )
1182
1207
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1183
1208
; RV64-NEXT: vle16.v v20, (a1)
1184
- ; RV64-NEXT: lui a1, %hi(.LCPI19_4 )
1185
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_4 )
1209
+ ; RV64-NEXT: lui a1, %hi(.LCPI20_4 )
1210
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4 )
1186
1211
; RV64-NEXT: vle16.v v8, (a1)
1187
1212
; RV64-NEXT: csrr a1, vlenb
1188
1213
; RV64-NEXT: li a2, 77
@@ -1233,8 +1258,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1233
1258
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
1234
1259
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1235
1260
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1236
- ; RV64-NEXT: lui a1, %hi(.LCPI19_5 )
1237
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_5 )
1261
+ ; RV64-NEXT: lui a1, %hi(.LCPI20_5 )
1262
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5 )
1238
1263
; RV64-NEXT: vle16.v v20, (a1)
1239
1264
; RV64-NEXT: csrr a1, vlenb
1240
1265
; RV64-NEXT: li a2, 61
@@ -1678,8 +1703,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
1678
1703
; RV32-NEXT: vle32.v v12, (a0), v0.t
1679
1704
; RV32-NEXT: li a0, 36
1680
1705
; RV32-NEXT: vmv.s.x v20, a1
1681
- ; RV32-NEXT: lui a1, %hi(.LCPI44_0 )
1682
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0 )
1706
+ ; RV32-NEXT: lui a1, %hi(.LCPI45_0 )
1707
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI45_0 )
1683
1708
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1684
1709
; RV32-NEXT: vle16.v v21, (a1)
1685
1710
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1754,8 +1779,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
1754
1779
; RV32-NEXT: vmv.s.x v10, a0
1755
1780
; RV32-NEXT: li a0, 146
1756
1781
; RV32-NEXT: vmv.s.x v11, a0
1757
- ; RV32-NEXT: lui a0, %hi(.LCPI45_0 )
1758
- ; RV32-NEXT: addi a0, a0, %lo(.LCPI45_0 )
1782
+ ; RV32-NEXT: lui a0, %hi(.LCPI46_0 )
1783
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI46_0 )
1759
1784
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1760
1785
; RV32-NEXT: vle16.v v20, (a0)
1761
1786
; RV32-NEXT: li a0, 36
0 commit comments