Skip to content

Commit c87ef14

Browse files
authored
[InstCombine][NFC] Precommit a test for folding a binary op of reductions. (#121568)
1 parent e2449f1 commit c87ef14

File tree

1 file changed

+215
-0
lines changed

1 file changed

+215
-0
lines changed
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i32 @add_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
5+
; CHECK-LABEL: define i32 @add_of_reduce_add(
6+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
7+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
8+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
9+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
10+
; CHECK-NEXT: ret i32 [[RES]]
11+
;
12+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
13+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
14+
%res = add i32 %v0_red, %v1_red
15+
ret i32 %res
16+
}
17+
18+
define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
19+
; CHECK-LABEL: define i32 @sub_of_reduce_add(
20+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
21+
; CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i32> [[V0]], [[V1]]
22+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
23+
; CHECK-NEXT: ret i32 [[RES]]
24+
;
25+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
26+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
27+
%res = sub i32 %v0_red, %v1_red
28+
ret i32 %res
29+
}
30+
31+
define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
32+
; CHECK-LABEL: define i32 @mul_of_reduce_mul(
33+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
34+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
35+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
36+
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]]
37+
; CHECK-NEXT: ret i32 [[RES]]
38+
;
39+
%v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
40+
%v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
41+
%res = mul i32 %v0_red, %v1_red
42+
ret i32 %res
43+
}
44+
45+
define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
46+
; CHECK-LABEL: define i32 @and_of_reduce_and(
47+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
48+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]])
49+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]])
50+
; CHECK-NEXT: [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]]
51+
; CHECK-NEXT: ret i32 [[RES]]
52+
;
53+
%v0_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v0)
54+
%v1_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v1)
55+
%res = and i32 %v0_red, %v1_red
56+
ret i32 %res
57+
}
58+
59+
define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
60+
; CHECK-LABEL: define i32 @or_of_reduce_or(
61+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
62+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
63+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
64+
; CHECK-NEXT: [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]]
65+
; CHECK-NEXT: ret i32 [[RES]]
66+
;
67+
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
68+
%v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
69+
%res = or i32 %v0_red, %v1_red
70+
ret i32 %res
71+
}
72+
73+
define i32 @xor_of_reduce_xor(<16 x i32> %v0, <16 x i32> %v1) {
74+
; CHECK-LABEL: define i32 @xor_of_reduce_xor(
75+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
76+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]])
77+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]])
78+
; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
79+
; CHECK-NEXT: ret i32 [[RES]]
80+
;
81+
%v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v0)
82+
%v1_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v1)
83+
%res = xor i32 %v0_red, %v1_red
84+
ret i32 %res
85+
}
86+
87+
define i32 @reduction_does_not_match_binop(<16 x i32> %v0, <16 x i32> %v1) {
88+
; CHECK-LABEL: define i32 @reduction_does_not_match_binop(
89+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
90+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
91+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
92+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
93+
; CHECK-NEXT: ret i32 [[RES]]
94+
;
95+
%v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
96+
%v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
97+
%res = add i32 %v0_red, %v1_red
98+
ret i32 %res
99+
}
100+
101+
define i32 @intrinsics_do_not_match(<16 x i32> %v0, <16 x i32> %v1) {
102+
; CHECK-LABEL: define i32 @intrinsics_do_not_match(
103+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
104+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
105+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
106+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
107+
; CHECK-NEXT: ret i32 [[RES]]
108+
;
109+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
110+
%v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
111+
%res = add i32 %v0_red, %v1_red
112+
ret i32 %res
113+
}
114+
115+
define i32 @element_counts_do_not_match(<16 x i32> %v0, <8 x i32> %v1) {
116+
; CHECK-LABEL: define i32 @element_counts_do_not_match(
117+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]]) {
118+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
119+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V1]])
120+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
121+
; CHECK-NEXT: ret i32 [[RES]]
122+
;
123+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
124+
%v1_red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v1)
125+
%res = add i32 %v0_red, %v1_red
126+
ret i32 %res
127+
}
128+
129+
define i32 @multiple_use_of_reduction_0(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
130+
; CHECK-LABEL: define i32 @multiple_use_of_reduction_0(
131+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
132+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
133+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
134+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
135+
; CHECK-NEXT: store i32 [[V0_RED]], ptr [[P]], align 4
136+
; CHECK-NEXT: ret i32 [[RES]]
137+
;
138+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
139+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
140+
%res = add i32 %v0_red, %v1_red
141+
store i32 %v0_red, ptr %p
142+
ret i32 %res
143+
}
144+
145+
define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
146+
; CHECK-LABEL: define i32 @multiple_use_of_reduction_1(
147+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
148+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
149+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
150+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
151+
; CHECK-NEXT: store i32 [[V1_RED]], ptr [[P]], align 4
152+
; CHECK-NEXT: ret i32 [[RES]]
153+
;
154+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
155+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
156+
%res = add i32 %v0_red, %v1_red
157+
store i32 %v1_red, ptr %p
158+
ret i32 %res
159+
}
160+
161+
define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1) {
162+
; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags(
163+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
164+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
165+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
166+
; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[V0_RED]], [[V1_RED]]
167+
; CHECK-NEXT: ret i32 [[RES]]
168+
;
169+
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
170+
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
171+
%res = add nsw nuw i32 %v0_red, %v1_red
172+
ret i32 %res
173+
}
174+
175+
define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1) {
176+
; CHECK-LABEL: define i32 @preserve_disjoint_flags(
177+
; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
178+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
179+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
180+
; CHECK-NEXT: [[RES:%.*]] = or disjoint i32 [[V0_RED]], [[V1_RED]]
181+
; CHECK-NEXT: ret i32 [[RES]]
182+
;
183+
%v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
184+
%v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
185+
%res = or disjoint i32 %v0_red, %v1_red
186+
ret i32 %res
187+
}
188+
189+
define i32 @add_of_reduce_add_vscale(<vscale x 16 x i32> %v0, <vscale x 16 x i32> %v1) {
190+
; CHECK-LABEL: define i32 @add_of_reduce_add_vscale(
191+
; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 16 x i32> [[V1:%.*]]) {
192+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
193+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V1]])
194+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
195+
; CHECK-NEXT: ret i32 [[RES]]
196+
;
197+
%v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v0)
198+
%v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v1)
199+
%res = add i32 %v0_red, %v1_red
200+
ret i32 %res
201+
}
202+
203+
define i32 @element_counts_do_not_match_vscale(<vscale x 16 x i32> %v0, <vscale x 8 x i32> %v1) {
204+
; CHECK-LABEL: define i32 @element_counts_do_not_match_vscale(
205+
; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 8 x i32> [[V1:%.*]]) {
206+
; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
207+
; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[V1]])
208+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
209+
; CHECK-NEXT: ret i32 [[RES]]
210+
;
211+
%v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v0)
212+
%v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 8 x i32> %v1)
213+
%res = add i32 %v0_red, %v1_red
214+
ret i32 %res
215+
}

0 commit comments

Comments
 (0)