llvm · spavloff · Aug 22, 2024 · Sep 25, 2024 · Oct 27, 2024 · Oct 28, 2024
diff --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c
@@ -27,7 +27,7 @@ void p(char *str, int x) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
 // CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR4:[0-9]+]]
 // CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void
@@ -43,7 +43,7 @@ void test_long_double_isinf(long double ld) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
 // CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR4]]
 // CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void
@@ -59,7 +59,7 @@ void test_long_double_isfinite(long double ld) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
 // CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR4]]
 // CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void

diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c
@@ -1575,8 +1575,8 @@ _Complex float mulf(_Complex float a, _Complex float b) {
 // X86WINPRMTD_STRICT-NEXT:    [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
 // X86WINPRMTD_STRICT-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
 // X86WINPRMTD_STRICT-NEXT:    [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
-// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
-// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4:[0-9]+]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
 // X86WINPRMTD_STRICT-NEXT:    [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
 // X86WINPRMTD_STRICT-NEXT:    br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
 // X86WINPRMTD_STRICT:       abs_rhsr_greater_or_equal_abs_rhsi:
@@ -2658,8 +2658,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
 // X86WINPRMTD_STRICT-NEXT:    [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
 // X86WINPRMTD_STRICT-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
 // X86WINPRMTD_STRICT-NEXT:    [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
-// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]]
-// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR4]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR4]]
 // X86WINPRMTD_STRICT-NEXT:    [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
 // X86WINPRMTD_STRICT-NEXT:    br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
 // X86WINPRMTD_STRICT:       abs_rhsr_greater_or_equal_abs_rhsi:
@@ -2713,8 +2713,8 @@ _Complex double muld(_Complex double a, _Complex double b) {
 // PRMTD_STRICT-NEXT:    [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
 // PRMTD_STRICT-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1
 // PRMTD_STRICT-NEXT:    [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
-// PRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR4]]
-// PRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR4]]
+// PRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR5:[0-9]+]]
+// PRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR5]]
 // PRMTD_STRICT-NEXT:    [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
 // PRMTD_STRICT-NEXT:    br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
 // PRMTD_STRICT:       abs_rhsr_greater_or_equal_abs_rhsi:
@@ -3961,8 +3961,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
 // X86WINPRMTD_STRICT-NEXT:    [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
 // X86WINPRMTD_STRICT-NEXT:    [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR3]]
 // X86WINPRMTD_STRICT-NEXT:    [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]]
-// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR3]]
-// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR3]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR4]]
+// X86WINPRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR4]]
 // X86WINPRMTD_STRICT-NEXT:    [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]]
 // X86WINPRMTD_STRICT-NEXT:    br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
 // X86WINPRMTD_STRICT:       abs_rhsr_greater_or_equal_abs_rhsi:
@@ -4038,8 +4038,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
 // PRMTD_STRICT-NEXT:    [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
 // PRMTD_STRICT-NEXT:    [[CONV:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR4]]
 // PRMTD_STRICT-NEXT:    [[CONV1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]]
-// PRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR4]]
-// PRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR4]]
+// PRMTD_STRICT-NEXT:    [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR5]]
+// PRMTD_STRICT-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR5]]
 // PRMTD_STRICT-NEXT:    [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]]
 // PRMTD_STRICT-NEXT:    br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]]
 // PRMTD_STRICT:       abs_rhsr_greater_or_equal_abs_rhsi:

diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -20,7 +20,7 @@ float4 strict_fadd(float4 a, float4 b) {
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_absDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR5:[0-9]+]]
 // CHECK-NEXT:    ret <4 x float> [[ELT_ABS]]
 //
 float4 strict_elementwise_abs(float4 a) {
@@ -300,7 +300,7 @@ float4 strict_elementwise_trunc(float4 a) {
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR5]]
 // CHECK-NEXT:    ret <4 x float> [[ELT_CANONICALIZE]]
 //
 float4 strict_elementwise_canonicalize(float4 a) {
@@ -310,7 +310,7 @@ float4 strict_elementwise_canonicalize(float4 a) {
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR5]]
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float4 strict_elementwise_copysign(float4 a, float4 b) {

diff --git a/clang/test/CodeGen/strictfp_builtins.c b/clang/test/CodeGen/strictfp_builtins.c
@@ -31,21 +31,21 @@ void p(char *str, int x) {
 // CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
 // CHECK-NEXT:    store double [[D:%.*]], ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
-// CHECK-NEXT:    [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    [[ISZERO:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] [ "fp.except"(metadata !"strict") ]
 // CHECK-NEXT:    br i1 [[ISZERO]], label [[FPCLASSIFY_END:%.*]], label [[FPCLASSIFY_NOT_ZERO:%.*]]
 // CHECK:       fpclassify_end:
 // CHECK-NEXT:    [[FPCLASSIFY_RESULT:%.*]] = phi i32 [ 4, [[ENTRY:%.*]] ], [ 0, [[FPCLASSIFY_NOT_ZERO]] ], [ 1, [[FPCLASSIFY_NOT_NAN:%.*]] ], [ [[TMP2:%.*]], [[FPCLASSIFY_NOT_INF:%.*]] ]
 // CHECK-NEXT:    call void @p(ptr noundef @.str.1, i32 noundef [[FPCLASSIFY_RESULT]]) #[[ATTR4]]
 // CHECK-NEXT:    ret void
 // CHECK:       fpclassify_not_zero:
-// CHECK-NEXT:    [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
 // CHECK-NEXT:    br i1 [[CMP]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_NAN]]
 // CHECK:       fpclassify_not_nan:
-// CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT:    [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
 // CHECK-NEXT:    br i1 [[ISINF]], label [[FPCLASSIFY_END]], label [[FPCLASSIFY_NOT_INF]]
 // CHECK:       fpclassify_not_inf:
-// CHECK-NEXT:    [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    [[ISNORMAL:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x10000000000000, metadata !"uge", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
 // CHECK-NEXT:    [[TMP2]] = select i1 [[ISNORMAL]], i32 2, i32 3
 // CHECK-NEXT:    br label [[FPCLASSIFY_END]]
 //
@@ -156,8 +156,8 @@ void test_double_isfinite(double d) {
 // CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
 // CHECK-NEXT:    store double [[D:%.*]], ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR5]]
-// CHECK-NEXT:    [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[TMP0]]) #[[ATTR6]]
+// CHECK-NEXT:    [[ISINF:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP1]], double 0x7FF0000000000000, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR5]] [ "fp.except"(metadata !"strict") ]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast double [[TMP0]] to i64
 // CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 0
 // CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 -1, i32 1

diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
@@ -144,7 +144,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
 // STRICTFP-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4
 // STRICTFP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]]
 // STRICTFP-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4
-// STRICTFP-NEXT:    [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]]
+// STRICTFP-NEXT:    [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR6:[0-9]+]] [ "fp.control"(metadata !"rte"), "fp.except"(metadata !"strict") ]
 // STRICTFP-NEXT:    [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
 // STRICTFP-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4
 // STRICTFP-NEXT:    [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
@@ -174,6 +174,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
 // STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
 // STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
 // STRICTFP: attributes #[[ATTR5]] = { strictfp }
+// STRICTFP: attributes #[[ATTR6]] = { strictfp memory(inaccessiblemem: readwrite) }
 //.
 // SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0}

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -3057,6 +3057,49 @@ A "convergencectrl" operand bundle is only valid on a ``convergent`` operation.
 When present, the operand bundle must contain exactly one value of token type.
 See the :doc:`ConvergentOperations` document for details.
 
+.. _ob_fp:
+
+Floating-point Operand Bundles
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These operand bundles are used for calls that involve floating-point
+operations and interact with :ref:`floating-point environment <floatenv>` or
+depend on floating-point options, such as rounding mode, denormal modes, etc.
+There are two kinds of such operand bundles, which represent the value of
+floating-point control modes and the treatment of status bits respectively.
+
+An operand bundle tagged with "fp.control" contains information about the
+control modes used for the operation execution. Currently, only rounding mode is
+supported. It is represented by a metadata string value, which specifies the
+rounding mode to be used for the operation evaluation. Possible values are:
+
+::
+
+    "rtz"  - toward zero
+    "rte"  - to nearest, ties to even
+    "rtp"  - toward positive infinity
+    "rtn"  - toward negative infinity
+    "rmm"  - to nearest, ties away from zero
+    "dyn"  - rounding mode is taken from control register
+
+If "fp.control" is absent, the default rounding rounding mode is taken from the
+control register (dynamic rounding). In the particular case of
+:ref:`default floating-point environment <floatenv>`, it must be rounding to
+nearest, ties to even.
+
+An operand bundle tagged with "fp.except" may be associated with operations
+that can read or write floating-point exception flags. It contains a single
+metadata string value, which can have one of the following values:
+
+::
+
+    "ignore"
+    "strict"
+    "maytrap"
+
+It has the same meaning as the corresponding argument in
+:ref:`constrained intrinsics <constrainedfp>`.
+
 .. _moduleasm:
 
 Module-Level Inline Assembly
@@ -3751,9 +3794,9 @@ round-to-nearest rounding mode, and subnormals are assumed to be preserved.
 Running LLVM code in an environment where these assumptions are not met
 typically leads to undefined behavior. The ``strictfp`` and ``denormal-fp-math``
 attributes as well as :ref:`Constrained Floating-Point Intrinsics
-<constrainedfp>` can be used to weaken LLVM's assumptions and ensure defined
-behavior in non-default floating-point environments; see their respective
-documentation for details.
+<constrainedfp>` or :ref:`floating-point operand bundles<ob_fp>` can be used to
+weaken LLVM's assumptions and ensure defined behavior in non-default
+floating-point environments; see their respective documentation for details.
 
 .. _floatnan:
 
@@ -3805,7 +3848,8 @@ Floating-point math operations are allowed to treat all NaNs as if they were
 quiet NaNs. For example, "pow(1.0, SNaN)" may be simplified to 1.0.
 
 Code that requires different behavior than this should use the
-:ref:`Constrained Floating-Point Intrinsics <constrainedfp>`.
+:ref:`Constrained Floating-Point Intrinsics <constrainedfp>` or
+:ref:`floating-point operand bundles<ob_fp>`.
 In particular, constrained intrinsics rule out the "Unchanged NaN propagation"
 case; they are guaranteed to return a QNaN.
 
@@ -17252,7 +17296,7 @@ would, and handles error conditions in the same way. Since LLVM assumes the
 :ref:`default floating-point environment <floatenv>`, the rounding mode is
 assumed to be set to "nearest", so halfway cases are rounded to the even
 integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>`
-to avoid that assumption.
+or :ref:`floating-point operand bundles<ob_fp>` to avoid that assumption.
 
 .. _int_nearbyint:
 
@@ -17293,8 +17337,8 @@ This function returns the same values as the libm ``nearbyint``
 functions would, and handles error conditions in the same way. Since LLVM
 assumes the :ref:`default floating-point environment <floatenv>`, the rounding
 mode is assumed to be set to "nearest", so halfway cases are rounded to the even
-integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>` to
-avoid that assumption.
+integer. Use :ref:`Constrained Floating-Point Intrinsics <constrainedfp>` or
+:ref:`floating-point operand bundles<ob_fp>` to avoid that assumption.
 
 .. _int_round:
 

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
@@ -61,6 +61,7 @@ Changes to the LLVM IR
   removed:
 
   * `mul`
+* Floating-point operand bundles have been added.
 
 Changes to LLVM infrastructure
 ------------------------------
-Original file line number
+Diff line change
@@ Expand Up / @@ -61,6 +61,7 @@ Changes to the LLVM IR @@
       removed:
       * `mul`
+    * Floating-point operand bundles have been added.
     Changes to LLVM infrastructure
     ------------------------------
@@ Expand Down @@