flang-compiler · kiranchandramohan · Jun 1, 2022 · Jan 16, 2022 · bryanpkc · May 17, 2022
diff --git a/runtime/libpgmath/lib/common/ceil.c b/runtime/libpgmath/lib/common/ceil.c
@@ -6,8 +6,40 @@
  */
 
 #include "mthdecls.h"
-#if     defined(__SSE4_1__) || defined(__AVX__)
-#include    <immintrin.h>
+
+#if defined(TARGET_X8664)
+/*
+ * For X8664, implement both SSE and AVX versions of __mth_i_ceil using ISA
+ * instruction extensions.
+ *
+ * Using inline assembly allows both the SSE and AVX versions of the routine
+ * to be compiled in a single unit.
+ *
+ * The following asm statements is equivalent to:
+ *      return _mm_cvtss_f32(_mm_ceil_ss(_mm_set1_ps(x), _mm_set1_ps(x)));
+ * But without the need for separate compiliations for SSE4.1 and AVX ISA
+ * extensions.
+ */
+
+float
+__mth_i_ceil_sse(float x)
+{
+  __asm__(
+    "roundss $0x2,%0,%0"
+    :"+x"(x)
+    );
+  return x;
+}
+
+float
+__mth_i_ceil_avx(float x)
+{
+  __asm__(
+    "vroundss $0x2,%0,%0,%0"
+    :"+x"(x)
+    );
+  return x;
+}
 #endif
 
 float

diff --git a/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h b/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h
@@ -5,7 +5,13 @@
  *
  */
 
-MTHINTRIN(ceil  , ss   , any        ,  __mth_i_ceil         , __mth_i_ceil          , __mth_i_ceil          ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , em64t      ,  __mth_i_ceil         , __mth_i_ceil          , __mth_i_ceil          ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , sse4       ,  __mth_i_ceil_sse     , __mth_i_ceil_sse      , __mth_i_ceil_sse      ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , avx        ,  __mth_i_ceil_avx     , __mth_i_ceil_avx      , __mth_i_ceil_avx      ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , avxfma4    ,  __mth_i_ceil_avx     , __mth_i_ceil_avx      , __mth_i_ceil_avx      ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , avx2       ,  __mth_i_ceil_avx     , __mth_i_ceil_avx      , __mth_i_ceil_avx      ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , avx512knl  ,  __mth_i_ceil_avx     , __mth_i_ceil_avx      , __mth_i_ceil_avx      ,__math_dispatch_error)
+MTHINTRIN(ceil  , ss   , avx512     ,  __mth_i_ceil_avx     , __mth_i_ceil_avx      , __mth_i_ceil_avx      ,__math_dispatch_error)
 MTHINTRIN(ceil  , ds   , em64t      ,  __mth_i_dceil        , __mth_i_dceil         , __mth_i_dceil         ,__math_dispatch_error)
 MTHINTRIN(ceil  , ds   , sse4       ,  __mth_i_dceil_sse    , __mth_i_dceil_sse     , __mth_i_dceil_sse     ,__math_dispatch_error)
 MTHINTRIN(ceil  , ds   , avx        ,  __mth_i_dceil_avx    , __mth_i_dceil_avx     , __mth_i_dceil_avx     ,__math_dispatch_error)