Skip to content

Commit 9fd8830

Browse files
committed
fixup! Minor fixes and add more tests
1 parent 66118fd commit 9fd8830

File tree

2 files changed

+71
-37
lines changed

2 files changed

+71
-37
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
//===----------------------------------------------------------------------===//
4646

4747
#include "llvm/ADT/ArrayRef.h"
48-
#include "llvm/ADT/BitVector.h"
4948
#include "llvm/ADT/DenseMap.h"
5049
#include "llvm/ADT/SetVector.h"
5150
#include "llvm/ADT/SmallVector.h"

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

+71-36
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,22 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
207207
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
208208
}
209209

210+
; We only extract some of the fields.
211+
define {<4 x i32>, <4 x i32>} @vpload_factor3_partial(ptr %ptr) {
212+
; CHECK-LABEL: vpload_factor3_partial:
213+
; CHECK: # %bb.0:
214+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215+
; CHECK-NEXT: vlseg3e32.v v7, (a0)
216+
; CHECK-NEXT: vmv1r.v v8, v7
217+
; CHECK-NEXT: ret
218+
%interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
219+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
220+
%v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
221+
%res0 = insertvalue {<4 x i32>, <4 x i32>} poison, <4 x i32> %v0, 0
222+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v2, 1
223+
ret {<4 x i32>, <4 x i32>} %res1
224+
}
225+
210226
; Load a larger vector but only deinterleave a subset of the elements.
211227
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) {
212228
; CHECK-LABEL: vpload_factor3_v16i32:
@@ -224,6 +240,7 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) {
224240
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
225241
}
226242

243+
; Make sure the mask is propagated.
227244
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) {
228245
; CHECK-LABEL: vpload_factor3_mask:
229246
; CHECK: # %bb.0:
@@ -241,6 +258,24 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) {
241258
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
242259
}
243260

261+
; Poison/undef in the shuffle mask shouldn't affect anything.
262+
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_poison_shufflemask(ptr %ptr) {
263+
; CHECK-LABEL: vpload_factor3_poison_shufflemask:
264+
; CHECK: # %bb.0:
265+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
266+
; CHECK-NEXT: vmv.v.i v0, 10
267+
; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
268+
; CHECK-NEXT: ret
269+
%interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1>, i32 12)
270+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
271+
%v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 poison, i32 10>
272+
%v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
273+
%res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
274+
%res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
275+
%res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
276+
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
277+
}
278+
244279
define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor4(ptr %ptr) {
245280
; CHECK-LABEL: vpload_factor4:
246281
; CHECK: # %bb.0:
@@ -367,8 +402,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
367402
; RV32-NEXT: li a2, 32
368403
; RV32-NEXT: lui a3, 12
369404
; RV32-NEXT: lui a6, 12291
370-
; RV32-NEXT: lui a7, %hi(.LCPI17_0)
371-
; RV32-NEXT: addi a7, a7, %lo(.LCPI17_0)
405+
; RV32-NEXT: lui a7, %hi(.LCPI19_0)
406+
; RV32-NEXT: addi a7, a7, %lo(.LCPI19_0)
372407
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
373408
; RV32-NEXT: vle32.v v24, (a5)
374409
; RV32-NEXT: vmv.s.x v0, a3
@@ -453,12 +488,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
453488
; RV32-NEXT: addi a1, a1, 16
454489
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
455490
; RV32-NEXT: lui a7, 49164
456-
; RV32-NEXT: lui a1, %hi(.LCPI17_1)
457-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_1)
491+
; RV32-NEXT: lui a1, %hi(.LCPI19_1)
492+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_1)
458493
; RV32-NEXT: lui t2, 3
459494
; RV32-NEXT: lui t1, 196656
460-
; RV32-NEXT: lui a4, %hi(.LCPI17_3)
461-
; RV32-NEXT: addi a4, a4, %lo(.LCPI17_3)
495+
; RV32-NEXT: lui a4, %hi(.LCPI19_3)
496+
; RV32-NEXT: addi a4, a4, %lo(.LCPI19_3)
462497
; RV32-NEXT: lui t0, 786624
463498
; RV32-NEXT: li a5, 48
464499
; RV32-NEXT: lui a6, 768
@@ -637,8 +672,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
637672
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
638673
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
639674
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
640-
; RV32-NEXT: lui a1, %hi(.LCPI17_2)
641-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_2)
675+
; RV32-NEXT: lui a1, %hi(.LCPI19_2)
676+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_2)
642677
; RV32-NEXT: lui a3, 3073
643678
; RV32-NEXT: addi a3, a3, -1024
644679
; RV32-NEXT: vmv.s.x v0, a3
@@ -702,16 +737,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
702737
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
703738
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
704739
; RV32-NEXT: vmv.v.v v28, v24
705-
; RV32-NEXT: lui a1, %hi(.LCPI17_4)
706-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_4)
707-
; RV32-NEXT: lui a2, %hi(.LCPI17_5)
708-
; RV32-NEXT: addi a2, a2, %lo(.LCPI17_5)
740+
; RV32-NEXT: lui a1, %hi(.LCPI19_4)
741+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_4)
742+
; RV32-NEXT: lui a2, %hi(.LCPI19_5)
743+
; RV32-NEXT: addi a2, a2, %lo(.LCPI19_5)
709744
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
710745
; RV32-NEXT: vle16.v v24, (a2)
711746
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
712747
; RV32-NEXT: vle16.v v8, (a1)
713-
; RV32-NEXT: lui a1, %hi(.LCPI17_7)
714-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_7)
748+
; RV32-NEXT: lui a1, %hi(.LCPI19_7)
749+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_7)
715750
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
716751
; RV32-NEXT: vle16.v v10, (a1)
717752
; RV32-NEXT: csrr a1, vlenb
@@ -739,14 +774,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
739774
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
740775
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
741776
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
742-
; RV32-NEXT: lui a1, %hi(.LCPI17_6)
743-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_6)
744-
; RV32-NEXT: lui a2, %hi(.LCPI17_8)
745-
; RV32-NEXT: addi a2, a2, %lo(.LCPI17_8)
777+
; RV32-NEXT: lui a1, %hi(.LCPI19_6)
778+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_6)
779+
; RV32-NEXT: lui a2, %hi(.LCPI19_8)
780+
; RV32-NEXT: addi a2, a2, %lo(.LCPI19_8)
746781
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
747782
; RV32-NEXT: vle16.v v4, (a1)
748-
; RV32-NEXT: lui a1, %hi(.LCPI17_9)
749-
; RV32-NEXT: addi a1, a1, %lo(.LCPI17_9)
783+
; RV32-NEXT: lui a1, %hi(.LCPI19_9)
784+
; RV32-NEXT: addi a1, a1, %lo(.LCPI19_9)
750785
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
751786
; RV32-NEXT: vle16.v v6, (a1)
752787
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -833,8 +868,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
833868
; RV64-NEXT: li a4, 128
834869
; RV64-NEXT: lui a1, 1
835870
; RV64-NEXT: vle64.v v8, (a3)
836-
; RV64-NEXT: lui a3, %hi(.LCPI17_0)
837-
; RV64-NEXT: addi a3, a3, %lo(.LCPI17_0)
871+
; RV64-NEXT: lui a3, %hi(.LCPI19_0)
872+
; RV64-NEXT: addi a3, a3, %lo(.LCPI19_0)
838873
; RV64-NEXT: vmv.s.x v0, a4
839874
; RV64-NEXT: csrr a4, vlenb
840875
; RV64-NEXT: li a5, 61
@@ -1022,8 +1057,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10221057
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10231058
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10241059
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1025-
; RV64-NEXT: lui a2, %hi(.LCPI17_1)
1026-
; RV64-NEXT: addi a2, a2, %lo(.LCPI17_1)
1060+
; RV64-NEXT: lui a2, %hi(.LCPI19_1)
1061+
; RV64-NEXT: addi a2, a2, %lo(.LCPI19_1)
10271062
; RV64-NEXT: li a3, 192
10281063
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10291064
; RV64-NEXT: vle16.v v6, (a2)
@@ -1057,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10571092
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
10581093
; RV64-NEXT: addi a2, sp, 16
10591094
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1060-
; RV64-NEXT: lui a2, %hi(.LCPI17_2)
1061-
; RV64-NEXT: addi a2, a2, %lo(.LCPI17_2)
1095+
; RV64-NEXT: lui a2, %hi(.LCPI19_2)
1096+
; RV64-NEXT: addi a2, a2, %lo(.LCPI19_2)
10621097
; RV64-NEXT: li a3, 1040
10631098
; RV64-NEXT: vmv.s.x v0, a3
10641099
; RV64-NEXT: addi a1, a1, -2016
@@ -1142,12 +1177,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11421177
; RV64-NEXT: add a1, sp, a1
11431178
; RV64-NEXT: addi a1, a1, 16
11441179
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1145-
; RV64-NEXT: lui a1, %hi(.LCPI17_3)
1146-
; RV64-NEXT: addi a1, a1, %lo(.LCPI17_3)
1180+
; RV64-NEXT: lui a1, %hi(.LCPI19_3)
1181+
; RV64-NEXT: addi a1, a1, %lo(.LCPI19_3)
11471182
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11481183
; RV64-NEXT: vle16.v v20, (a1)
1149-
; RV64-NEXT: lui a1, %hi(.LCPI17_4)
1150-
; RV64-NEXT: addi a1, a1, %lo(.LCPI17_4)
1184+
; RV64-NEXT: lui a1, %hi(.LCPI19_4)
1185+
; RV64-NEXT: addi a1, a1, %lo(.LCPI19_4)
11511186
; RV64-NEXT: vle16.v v8, (a1)
11521187
; RV64-NEXT: csrr a1, vlenb
11531188
; RV64-NEXT: li a2, 77
@@ -1198,8 +1233,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11981233
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
11991234
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12001235
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1201-
; RV64-NEXT: lui a1, %hi(.LCPI17_5)
1202-
; RV64-NEXT: addi a1, a1, %lo(.LCPI17_5)
1236+
; RV64-NEXT: lui a1, %hi(.LCPI19_5)
1237+
; RV64-NEXT: addi a1, a1, %lo(.LCPI19_5)
12031238
; RV64-NEXT: vle16.v v20, (a1)
12041239
; RV64-NEXT: csrr a1, vlenb
12051240
; RV64-NEXT: li a2, 61
@@ -1643,8 +1678,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
16431678
; RV32-NEXT: vle32.v v12, (a0), v0.t
16441679
; RV32-NEXT: li a0, 36
16451680
; RV32-NEXT: vmv.s.x v20, a1
1646-
; RV32-NEXT: lui a1, %hi(.LCPI42_0)
1647-
; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0)
1681+
; RV32-NEXT: lui a1, %hi(.LCPI44_0)
1682+
; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0)
16481683
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
16491684
; RV32-NEXT: vle16.v v21, (a1)
16501685
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1719,8 +1754,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
17191754
; RV32-NEXT: vmv.s.x v10, a0
17201755
; RV32-NEXT: li a0, 146
17211756
; RV32-NEXT: vmv.s.x v11, a0
1722-
; RV32-NEXT: lui a0, %hi(.LCPI43_0)
1723-
; RV32-NEXT: addi a0, a0, %lo(.LCPI43_0)
1757+
; RV32-NEXT: lui a0, %hi(.LCPI45_0)
1758+
; RV32-NEXT: addi a0, a0, %lo(.LCPI45_0)
17241759
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
17251760
; RV32-NEXT: vle16.v v20, (a0)
17261761
; RV32-NEXT: li a0, 36

0 commit comments

Comments
 (0)