Skip to content

Commit c41809e

Browse files
committed
Do not use strictfp call attribute as indicator of FPE awareness
1 parent 36dc54b commit c41809e

14 files changed

+498
-294
lines changed

clang/test/CodeGen/cx-complex-range.c

+10-10
Original file line numberDiff line numberDiff line change
@@ -1575,8 +1575,8 @@ _Complex float mulf(_Complex float a, _Complex float b) {
15751575
// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
15761576
// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
15771577
// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
1578-
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
1579-
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
1578+
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4:[0-9]+]]
1579+
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
15801580
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
15811581
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
15821582
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
@@ -2658,8 +2658,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
26582658
// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
26592659
// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
26602660
// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
2661-
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
2662-
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
2661+
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4]]
2662+
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
26632663
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
26642664
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
26652665
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
@@ -2713,8 +2713,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
27132713
// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
27142714
// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1
27152715
// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
2716-
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR4]]
2717-
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR4]]
2716+
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR5:[0-9]+]]
2717+
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR5]]
27182718
// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
27192719
// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
27202720
// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
@@ -3961,8 +3961,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
39613961
// X86WINPRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
39623962
// X86WINPRMTD_STRICT-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR3]]
39633963
// X86WINPRMTD_STRICT-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]]
3964-
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR3]]
3965-
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR3]]
3964+
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR4]]
3965+
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR4]]
39663966
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
39673967
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
39683968
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
@@ -4038,8 +4038,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
40384038
// PRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
40394039
// PRMTD_STRICT-NEXT: [[CONV:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR4]]
40404040
// PRMTD_STRICT-NEXT: [[CONV1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]]
4041-
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR4]]
4042-
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR4]]
4041+
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR5]]
4042+
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR5]]
40434043
// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
40444044
// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
40454045
// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:

clang/test/CodeGen/strictfp-elementwise-bulitins.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ float4 strict_fadd(float4 a, float4 b) {
2020
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_absDv4_f
2121
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
2222
// CHECK-NEXT: entry:
23-
// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR4]]
23+
// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR5:[0-9]+]]
2424
// CHECK-NEXT: ret <4 x float> [[ELT_ABS]]
2525
//
2626
float4 strict_elementwise_abs(float4 a) {
@@ -300,7 +300,7 @@ float4 strict_elementwise_trunc(float4 a) {
300300
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f
301301
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
302302
// CHECK-NEXT: entry:
303-
// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]]
303+
// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR5]]
304304
// CHECK-NEXT: ret <4 x float> [[ELT_CANONICALIZE]]
305305
//
306306
float4 strict_elementwise_canonicalize(float4 a) {
@@ -310,7 +310,7 @@ float4 strict_elementwise_canonicalize(float4 a) {
310310
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_
311311
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
312312
// CHECK-NEXT: entry:
313-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
313+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR5]]
314314
// CHECK-NEXT: ret <4 x float> [[TMP0]]
315315
//
316316
float4 strict_elementwise_copysign(float4 a, float4 b) {

clang/test/CodeGen/strictfp_builtins.c

+10-10
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ void test_fpclassify(double d) {
6060
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
6161
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
6262
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
63-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) #[[ATTR5]]
63+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) #[[ATTR4]]
6464
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
6565
// CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR4]]
6666
// CHECK-NEXT: ret void
@@ -76,7 +76,7 @@ void test_fp16_isinf(_Float16 h) {
7676
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
7777
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
7878
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
79-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) #[[ATTR5]]
79+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) #[[ATTR4]]
8080
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
8181
// CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR4]]
8282
// CHECK-NEXT: ret void
@@ -92,7 +92,7 @@ void test_float_isinf(float f) {
9292
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
9393
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
9494
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
95-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) #[[ATTR5]]
95+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) #[[ATTR4]]
9696
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
9797
// CHECK-NEXT: call void @p(ptr noundef @.str.4, i32 noundef [[TMP2]]) #[[ATTR4]]
9898
// CHECK-NEXT: ret void
@@ -108,7 +108,7 @@ void test_double_isinf(double d) {
108108
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
109109
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
110110
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
111-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) #[[ATTR5]]
111+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) #[[ATTR4]]
112112
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
113113
// CHECK-NEXT: call void @p(ptr noundef @.str.5, i32 noundef [[TMP2]]) #[[ATTR4]]
114114
// CHECK-NEXT: ret void
@@ -124,7 +124,7 @@ void test_fp16_isfinite(_Float16 h) {
124124
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
125125
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
126126
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
127-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) #[[ATTR5]]
127+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) #[[ATTR4]]
128128
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
129129
// CHECK-NEXT: call void @p(ptr noundef @.str.6, i32 noundef [[TMP2]]) #[[ATTR4]]
130130
// CHECK-NEXT: ret void
@@ -140,7 +140,7 @@ void test_float_isfinite(float f) {
140140
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
141141
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
142142
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
143-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) #[[ATTR5]]
143+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) #[[ATTR4]]
144144
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
145145
// CHECK-NEXT: call void @p(ptr noundef @.str.7, i32 noundef [[TMP2]]) #[[ATTR4]]
146146
// CHECK-NEXT: ret void
@@ -176,7 +176,7 @@ void test_isinf_sign(double d) {
176176
// CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2
177177
// CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2
178178
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2
179-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) #[[ATTR5]]
179+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) #[[ATTR4]]
180180
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
181181
// CHECK-NEXT: call void @p(ptr noundef @.str.9, i32 noundef [[TMP2]]) #[[ATTR4]]
182182
// CHECK-NEXT: ret void
@@ -192,7 +192,7 @@ void test_fp16_isnan(_Float16 h) {
192192
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
193193
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
194194
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
195-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) #[[ATTR5]]
195+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) #[[ATTR4]]
196196
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
197197
// CHECK-NEXT: call void @p(ptr noundef @.str.10, i32 noundef [[TMP2]]) #[[ATTR4]]
198198
// CHECK-NEXT: ret void
@@ -208,7 +208,7 @@ void test_float_isnan(float f) {
208208
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
209209
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
210210
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
211-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) #[[ATTR5]]
211+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) #[[ATTR4]]
212212
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
213213
// CHECK-NEXT: call void @p(ptr noundef @.str.11, i32 noundef [[TMP2]]) #[[ATTR4]]
214214
// CHECK-NEXT: ret void
@@ -224,7 +224,7 @@ void test_double_isnan(double d) {
224224
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
225225
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
226226
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
227-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) #[[ATTR5]]
227+
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) #[[ATTR4]]
228228
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
229229
// CHECK-NEXT: call void @p(ptr noundef @.str.12, i32 noundef [[TMP2]]) #[[ATTR4]]
230230
// CHECK-NEXT: ret void

clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl

+3-2
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
144144
// STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4
145145
// STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]]
146146
// STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4
147-
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]] [ "fpe.control"(metadata !"rte"), "fpe.except"(metadata !"strict") ]
147+
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]] [ "fpe.control"(metadata !"rte"), "fpe.except"(metadata !"strict") ]
148148
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
149149
// STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4
150150
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
@@ -173,7 +173,8 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
173173
// STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" }
174174
// STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
175175
// STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
176-
// STRICTFP: attributes #[[ATTR5]] = { strictfp memory(inaccessiblemem: readwrite) }
176+
// STRICTFP: attributes #[[ATTR5]] = { strictfp }
177+
// STRICTFP: attributes #[[ATTR6]] = { strictfp memory(inaccessiblemem: readwrite) }
177178
//.
178179
// SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
179180
// SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0}

llvm/include/llvm/IR/IRBuilder.h

-5
Original file line numberDiff line numberDiff line change
@@ -379,11 +379,6 @@ class IRBuilderBase {
379379

380380
void setConstrainedFPCallAttr(CallBase *I) {
381381
I->addFnAttr(Attribute::StrictFP);
382-
MemoryEffects ME = MemoryEffects::inaccessibleMemOnly();
383-
if (I->getAttributes().hasFnAttr(Attribute::Memory))
384-
ME |= I->getAttributes().getMemoryEffects();
385-
auto A = Attribute::getWithMemoryEffects(getContext(), ME);
386-
I->addFnAttr(A);
387382
}
388383

389384
void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {

llvm/include/llvm/IR/InstrTypes.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1158,7 +1158,7 @@ class CallBase : public Instruction {
11581158
/// number of extra operands.
11591159
unsigned getNumSubclassExtraOperandsDynamic() const;
11601160

1161-
MemoryEffects getMemoryEffectsForBundles() const;
1161+
MemoryEffects getFloatingPointMemoryEffects() const;
11621162

11631163
public:
11641164
using Instruction::getContext;

llvm/include/llvm/Support/ModRef.h

+4
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ template <typename LocationEnum> class MemoryEffectsBase {
224224
return getWithoutLoc(Location::InaccessibleMem).doesNotAccessMemory();
225225
}
226226

227+
bool doesAccessInaccessibleMem() const {
228+
return isModOrRefSet(getModRef(Location::InaccessibleMem));
229+
}
230+
227231
/// Whether this function only (at most) accesses errno memory.
228232
bool onlyAccessesErrnoMem() const {
229233
return getWithoutLoc(Location::ErrnoMem).doesNotAccessMemory();

llvm/lib/IR/IRBuilder.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,14 @@ CallInst *IRBuilderBase::CreateCall(FunctionType *FTy, Value *Callee,
9393
ArrayRef<OperandBundleDef> ActualBundlesRef = OpBundles;
9494
SmallVector<OperandBundleDef, 2> ActualBundles;
9595

96+
bool doesCallAccessFPEnv = false;
9697
if (IsFPConstrained) {
9798
if (const auto *Func = dyn_cast<Function>(Callee)) {
9899
if (Intrinsic::ID ID = Func->getIntrinsicID()) {
99-
if (IntrinsicInst::canAccessFPEnvironment(ID)) {
100+
if (IntrinsicInst::canAccessFPEnvironment(ID) ||
101+
Intrinsic::isConstrainedFPIntrinsic(ID)) {
100102
bool NeedRound = true, NeedExcept = true;
103+
doesCallAccessFPEnv = true;
101104
for (const auto &Item : OpBundles) {
102105
if (NeedRound && Item.getTag() == "fpe.control")
103106
NeedRound = false;
@@ -116,6 +119,13 @@ CallInst *IRBuilderBase::CreateCall(FunctionType *FTy, Value *Callee,
116119
}
117120

118121
CallInst *CI = CallInst::Create(FTy, Callee, Args, ActualBundlesRef);
122+
if (doesCallAccessFPEnv) {
123+
MemoryEffects ME = MemoryEffects::inaccessibleMemOnly();
124+
if (CI->getAttributes().hasFnAttr(Attribute::Memory))
125+
ME |= CI->getAttributes().getMemoryEffects();
126+
auto A = Attribute::getWithMemoryEffects(getContext(), ME);
127+
CI->addFnAttr(A);
128+
}
119129
if (IsFPConstrained)
120130
setConstrainedFPCallAttr(CI);
121131
if (isa<FPMathOperator>(CI))

0 commit comments

Comments
 (0)