Skip to content

Implement operand bundles for floating-point operations #109798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
6 changes: 3 additions & 3 deletions clang/test/CodeGen/X86/strictfp_builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void p(char *str, int x) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
Expand All @@ -43,7 +43,7 @@ void test_long_double_isinf(long double ld) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR4]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
Expand All @@ -59,7 +59,7 @@ void test_long_double_isfinite(long double ld) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR4]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
Expand Down
20 changes: 10 additions & 10 deletions clang/test/CodeGen/cx-complex-range.c
Original file line number Diff line number Diff line change
Expand Up @@ -1575,8 +1575,8 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4:[0-9]+]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
Expand Down Expand Up @@ -2658,8 +2658,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
Expand Down Expand Up @@ -2713,8 +2713,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1
// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR5:[0-9]+]]
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR5]]
// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
Expand Down Expand Up @@ -3961,8 +3961,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
// X86WINPRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
// X86WINPRMTD_STRICT-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR4]]
// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR4]]
// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
Expand Down Expand Up @@ -4038,8 +4038,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
// PRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
// PRMTD_STRICT-NEXT: [[CONV:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[CONV1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR4]]
// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR5]]
// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR5]]
// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi:
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ float4 strict_fadd(float4 a, float4 b) {
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_absDv4_f
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR4]]
// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR5:[0-9]+]]
// CHECK-NEXT: ret <4 x float> [[ELT_ABS]]
//
float4 strict_elementwise_abs(float4 a) {
Expand Down Expand Up @@ -300,7 +300,7 @@ float4 strict_elementwise_trunc(float4 a) {
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]]
// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR5]]
// CHECK-NEXT: ret <4 x float> [[ELT_CANONICALIZE]]
//
float4 strict_elementwise_canonicalize(float4 a) {
Expand All @@ -310,7 +310,7 @@ float4 strict_elementwise_canonicalize(float4 a) {
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR5]]
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
float4 strict_elementwise_copysign(float4 a, float4 b) {
Expand Down
14 changes: 7 additions & 7 deletions clang/test/CodeGen/strictfp_builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,21 @@ void p(char *str, int x) {
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
// CHECK-NEXT: [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
// CHECK-NEXT: [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] [ "fp.except"(metadata !"strict") ]
// CHECK-NEXT: br i1 [[ISZERO]], label [[FPCLASSIFY_END:%.*]], label [[FPCLASSIFY_NOT_ZERO:%.*]]
// CHECK: fpclassify_end:
// CHECK-NEXT: [[FPCLASSIFY_RESULT:%.*]] = phi i32 [ 4, [[ENTRY:%.*]] ], [ 0, [[FPCLASSIFY_NOT_ZERO]] ], [ 1, [[FPCLASSIFY_NOT_NAN:%.*]] ], [ [[TMP2:%.*]], [[FPCLASSIFY_NOT_INF:%.*]] ]
// CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[FPCLASSIFY_RESULT]]) #[[ATTR4]]
// CHECK-NEXT: ret void
// CHECK: fpclassify_not_zero:
// CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR4]]
// CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
// CHECK-NEXT: br i1 [[CMP]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_NAN]]
// CHECK: fpclassify_not_nan:
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5:[0-9]+]]
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
// CHECK-NEXT: br i1 [[ISINF]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_INF]]
// CHECK: fpclassify_not_inf:
// CHECK-NEXT: [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR4]]
// CHECK-NEXT: [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
// CHECK-NEXT: [[TMP2]] = select i1 [[ISNORMAL]], i32 2, i32 3
// CHECK-NEXT: br label [[FPCLASSIFY_END]]
//
Expand Down Expand Up @@ -156,8 +156,8 @@ void test_double_isfinite(double d) {
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8
// CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5]]
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6]]
// CHECK-NEXT: [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP0]] to i64
// CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 0
// CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 -1, i32 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
// STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4
// STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]]
// STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]]
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]] [ "fp.control"(metadata !"rte"), "fp.except"(metadata !"strict") ]
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
// STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
Expand Down Expand Up @@ -174,6 +174,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
// STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
// STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
// STRICTFP: attributes #[[ATTR5]] = { strictfp }
// STRICTFP: attributes #[[ATTR6]] = { strictfp memory(inaccessiblemem: readwrite) }
//.
// SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0}
Expand Down
58 changes: 51 additions & 7 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3057,6 +3057,49 @@ A "convergencectrl" operand bundle is only valid on a ``convergent`` operation.
When present, the operand bundle must contain exactly one value of token type.
See the :doc:`ConvergentOperations` document for details.

.. _ob_fp:

Floating-point Operand Bundles
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

These operand bundles are used for calls that involve floating-point
operations and interact with :ref:`floating-point environment <floatenv>` or
depend on floating-point options, such as rounding mode, denormal modes, etc.
There are two kinds of such operand bundles, which represent the value of
floating-point control modes and the treatment of status bits respectively.

An operand bundle tagged with "fp.control" contains information about the
control modes used for the operation execution. Currently, only rounding mode is
supported. It is represented by a metadata string value, which specifies the
rounding mode to be used for the operation evaluation. Possible values are:

::

"rtz" - toward zero
"rte" - to nearest, ties to even
"rtp" - toward positive infinity
"rtn" - toward negative infinity
"rmm" - to nearest, ties away from zero
"dyn" - rounding mode is taken from control register

If "fp.control" is absent, the default rounding rounding mode is taken from the
control register (dynamic rounding). In the particular case of
:ref:`default floating-point environment <floatenv>`, it must be rounding to
nearest, ties to even.

An operand bundle tagged with "fp.except" may be associated with operations
that can read or write floating-point exception flags. It contains a single
metadata string value, which can have one of the following values:

::

"ignore"
"strict"
"maytrap"

It has the same meaning as the corresponding argument in
:ref:`constrained intrinsics <constrainedfp>`.

.. _moduleasm:

Module-Level Inline Assembly
Expand Down Expand Up @@ -3751,9 +3794,9 @@ round-to-nearest rounding mode, and subnormals are assumed to be preserved.
Running LLVM code in an environment where these assumptions are not met
typically leads to undefined behavior. The ``strictfp`` and ``denormal-fp-math``
attributes as well as :ref:`Constrained Floating-Point Intrinsics
<constrainedfp>` can be used to weaken LLVM's assumptions and ensure defined
behavior in non-default floating-point environments; see their respective
documentation for details.
<constrainedfp>` or :ref:`floating-point operand bundles<ob_fp>` can be used to
weaken LLVM's assumptions and ensure defined behavior in non-default
floating-point environments; see their respective documentation for details.

.. _floatnan:

Expand Down Expand Up @@ -3805,7 +3848,8 @@ Floating-point math operations are allowed to treat all NaNs as if they were
quiet NaNs. For example, "pow(1.0, SNaN)" may be simplified to 1.0.

Code that requires different behavior than this should use the
:ref:`Constrained Floating-Point Intrinsics <constrainedfp>`.
:ref:`Constrained Floating-Point Intrinsics <constrainedfp>` or
:ref:`floating-point operand bundles<ob_fp>`.
In particular, constrained intrinsics rule out the "Unchanged NaN propagation"
case; they are guaranteed to return a QNaN.

Expand Down Expand Up @@ -17252,7 +17296,7 @@ would, and handles error conditions in the same way. Since LLVM assumes the
:ref:`default floating-point environment <floatenv>`, the rounding mode is
assumed to be set to "nearest", so halfway cases are rounded to the even
integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>`
to avoid that assumption.
or :ref:`floating-point operand bundles<ob_fp>` to avoid that assumption.

.. _int_nearbyint:

Expand Down Expand Up @@ -17293,8 +17337,8 @@ This function returns the same values as the libm ``nearbyint``
functions would, and handles error conditions in the same way. Since LLVM
assumes the :ref:`default floating-point environment <floatenv>`, the rounding
mode is assumed to be set to "nearest", so halfway cases are rounded to the even
integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>` to
avoid that assumption.
integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>` or
:ref:`floating-point operand bundles<ob_fp>` to avoid that assumption.

.. _int_round:

Expand Down
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ Changes to the LLVM IR
removed:

* `mul`
* Floating-point operand bundles have been added.

Changes to LLVM infrastructure
------------------------------
Expand Down
Loading