From 0bfa20f7a2deaf61e444404f7418f4571a9d6de4 Mon Sep 17 00:00:00 2001 From: John Platts Date: Wed, 5 Mar 2025 13:11:15 -0600 Subject: [PATCH 1/3] Fixed vec_pack_to_short_fp32 in Clang altivec.h --- clang/lib/Headers/altivec.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 8da65055012f1..45d557238e3d9 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -7529,13 +7529,9 @@ vec_pack(vector double __a, vector double __b) { #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned short __ATTRS_o_ai vec_pack_to_short_fp32(vector float __a, vector float __b) { - vector float __resa = __builtin_vsx_xvcvsphp(__a); - vector float __resb = __builtin_vsx_xvcvsphp(__b); -#ifdef __LITTLE_ENDIAN__ - return (vector unsigned short)vec_mergee(__resa, __resb); -#else - return (vector unsigned short)vec_mergeo(__resa, __resb); -#endif + vector unsigned int __resa = (vector unsigned int)__builtin_vsx_xvcvsphp(__a); + vector unsigned int __resb = (vector unsigned int)__builtin_vsx_xvcvsphp(__b); + return vec_pack(__resa, __resb); } #endif From 51c05790f7cd38e6db324dcd55b7491118b1c3e2 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Fri, 7 Mar 2025 13:44:52 -0500 Subject: [PATCH 2/3] add additional checks to test for vec_pack_to_short_fp32 --- clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c index b55a522ed2608..68d32ee14c8fa 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c @@ -853,10 +853,16 @@ vector unsigned char test73(void) { vector unsigned short test74(void) { // CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> -// CHECK-BE: @llvm.ppc.altivec.vperm +// CHECK-BE: [[REG0:%[0-9]+]] = call <4 x i32> @llvm.ppc.altivec.vperm +// CHECK-BE-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <4 x float> +// CHECK-BE-NEXT: [[REG2:%[0-9]+]] = bitcast <4 x float> [[REG1]] to <8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> [[REG2]] // CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> -// CHECK: @llvm.ppc.altivec.vperm +// CHECK: [[REG0:%[0-9]+]] = call <4 x i32> @llvm.ppc.altivec.vperm +// CHECK-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <4 x float> +// CHECK-NEXT: [[REG2:%[0-9]+]] = bitcast <4 x float> [[REG1]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[REG2]] return vec_pack_to_short_fp32(vfa, vfb); } vector unsigned int test75(void) { From 9aed4ea26d7dbe5fa786638eeabcbf7a8f0f51c0 Mon Sep 17 00:00:00 2001 From: John Platts Date: Fri, 7 Mar 2025 15:03:35 -0600 Subject: [PATCH 3/3] Updated test to reflect changes made to vec_pack_to_short_fp32 --- clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c index 68d32ee14c8fa..824267b98564e 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c @@ -854,15 +854,13 @@ vector unsigned short test74(void) { // CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK-BE: [[REG0:%[0-9]+]] = call <4 x i32> @llvm.ppc.altivec.vperm -// CHECK-BE-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <4 x float> -// CHECK-BE-NEXT: [[REG2:%[0-9]+]] = bitcast <4 x float> [[REG1]] to <8 x i16> -// CHECK-BE-NEXT: ret <8 x i16> [[REG2]] +// CHECK-BE-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> [[REG1]] // CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> // CHECK: [[REG0:%[0-9]+]] = call <4 x i32> @llvm.ppc.altivec.vperm -// CHECK-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <4 x float> -// CHECK-NEXT: [[REG2:%[0-9]+]] = bitcast <4 x float> [[REG1]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[REG2]] +// CHECK-NEXT: [[REG1:%[0-9]+]] = bitcast <4 x i32> [[REG0]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[REG1]] return vec_pack_to_short_fp32(vfa, vfb); } vector unsigned int test75(void) {