[X86] Do not apply fast-math to the logic intriniscs #118603

phoebewang · 2024-12-04T08:28:40Z

Fixes: #118152

Fixes: llvm#118152

llvmbot · 2024-12-04T08:29:15Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Phoebe Wang (phoebewang)

Changes

Fixes: #118152

Full diff: https://github.com/llvm/llvm-project/pull/118603.diff

3 Files Affected:

(modified) clang/lib/Headers/avx512dqintrin.h (+3)
(modified) clang/lib/Headers/avxintrin.h (+3)
(modified) clang/lib/Headers/xmmintrin.h (+3)

diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 88b48e3a32070b..d3ae91e29f9afb 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -167,6 +167,8 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
                                              (__v8di)_mm512_setzero_si512());
 }
 
+#pragma float_control(push)
+#pragma float_control(precise, on)
 static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_xor_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A ^ (__v8du)__B);
@@ -318,6 +320,7 @@ _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_andnot_ps(__A, __B),
                                              (__v16sf)_mm512_setzero_ps());
 }
+#pragma float_control(pop)
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtpd_epi64 (__m512d __A) {
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 8e497a98234994..9fd3dbb5519e51 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -542,6 +542,8 @@ _mm256_rcp_ps(__m256 __a)
 /// \returns A 256-bit vector of [8 x float] containing the rounded down values.
 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
 
+#pragma float_control(push)
+#pragma float_control(precise, on)
 /* Logical */
 /// Performs a bitwise AND of two 256-bit vectors of [4 x double].
 ///
@@ -692,6 +694,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a ^ (__v8su)__b);
 }
+#pragma float_control(pop)
 
 /* Horizontal arithmetic */
 /// Horizontally adds the adjacent pairs of values contained in two
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 20e66d190113a3..dbc3f0d8df68fe 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -425,6 +425,8 @@ _mm_max_ps(__m128 __a, __m128 __b)
   return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
 }
 
+#pragma float_control(push)
+#pragma float_control(precise, on)
 /// Performs a bitwise AND of two 128-bit vectors of [4 x float].
 ///
 /// \headerfile <x86intrin.h>
@@ -497,6 +499,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_xor_ps(__m128 __a, __m128 __b) {
   return (__m128)((__v4su)__a ^ (__v4su)__b);
 }
+#pragma float_control(pop)
 
 /// Compares two 32-bit float values in the low-order bits of both
 ///    operands for equality.

arsenm · 2024-12-04T13:17:17Z

clang/lib/Headers/avx512dqintrin.h

@@ -167,6 +167,8 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
                                             (__v8di)_mm512_setzero_si512());
 }

+#pragma float_control(push)


Needs tests

Thanks, this is just an experiment. Unfortunately it doesn't help with #118152, so I turn it into draft. Will be back to it if I find useful scenarios.

[X86] Do not apply fast-math to the logic intriniscs

6ec0b5b

Fixes: llvm#118152

phoebewang requested review from arsenm, andykaylor and jcranmer-intel December 4, 2024 08:28

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Dec 4, 2024

phoebewang mentioned this pull request Dec 4, 2024

[clang] -ffast-math in 19.1.0 prevents function from returning intended __m128 bitmask #118152

Open

phoebewang marked this pull request as draft December 4, 2024 09:46

arsenm reviewed Dec 4, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[X86] Do not apply fast-math to the logic intriniscs #118603

[X86] Do not apply fast-math to the logic intriniscs #118603

phoebewang commented Dec 4, 2024

llvmbot commented Dec 4, 2024 •

edited

Loading

arsenm Dec 4, 2024

phoebewang Dec 4, 2024

[X86] Do not apply fast-math to the logic intriniscs #118603

Are you sure you want to change the base?

[X86] Do not apply fast-math to the logic intriniscs #118603

Conversation

phoebewang commented Dec 4, 2024

llvmbot commented Dec 4, 2024 • edited Loading

arsenm Dec 4, 2024

Choose a reason for hiding this comment

phoebewang Dec 4, 2024

Choose a reason for hiding this comment

llvmbot commented Dec 4, 2024 •

edited

Loading