-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AMDGPU][NFC] Added Pre-commit tests for PR#137137 #137150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AMDGPU][NFC] Added Pre-commit tests for PR#137137 #137150
Conversation
This adds llc LIT test for vector fp16 operations like log, exp, etc. Its act as the pre-commit test for github PR#137137.
@llvm/pr-subscribers-backend-amdgpu Author: Vikash Gupta (vg0204) ChangesThis adds llc LIT test for vector fp16 operations like log, exp, etc. Its act as the pre-commit test for github PR: #137137 Patch is 107.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137150.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/vector-fp16.ll b/llvm/test/CodeGen/AMDGPU/vector-fp16.ll
new file mode 100644
index 0000000000000..274e50aae0230
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vector-fp16.ll
@@ -0,0 +1,2538 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX906 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX908 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+declare <1 x half> @llvm.sin.v1f16(<1 x half>)
+declare <1 x half> @llvm.cos.v1f16(<1 x half>)
+declare <1 x half> @llvm.log.v1f16(<1 x half>)
+declare <1 x half> @llvm.log2.v1f16(<1 x half>)
+declare <1 x half> @llvm.log10.v1f16(<1 x half>)
+declare <1 x half> @llvm.exp.v1f16(<1 x half>)
+declare <1 x half> @llvm.exp2.v1f16(<1 x half>)
+declare <1 x half> @llvm.exp10.v1f16(<1 x half>)
+declare <1 x half> @llvm.sqrt.v1f16(<1 x half>)
+
+declare <2 x half> @llvm.sin.v2f16(<2 x half>)
+declare <2 x half> @llvm.cos.v2f16(<2 x half>)
+declare <2 x half> @llvm.log.v2f16(<2 x half>)
+declare <2 x half> @llvm.log2.v2f16(<2 x half>)
+declare <2 x half> @llvm.log10.v2f16(<2 x half>)
+declare <2 x half> @llvm.exp.v2f16(<2 x half>)
+declare <2 x half> @llvm.exp2.v2f16(<2 x half>)
+declare <2 x half> @llvm.exp10.v2f16(<2 x half>)
+declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
+
+declare <4 x half> @llvm.sin.v4f16(<4 x half>)
+declare <4 x half> @llvm.cos.v4f16(<4 x half>)
+declare <4 x half> @llvm.log.v4f16(<4 x half>)
+declare <4 x half> @llvm.log2.v4f16(<4 x half>)
+declare <4 x half> @llvm.log10.v4f16(<4 x half>)
+declare <4 x half> @llvm.exp.v4f16(<4 x half>)
+declare <4 x half> @llvm.exp2.v4f16(<4 x half>)
+declare <4 x half> @llvm.exp10.v4f16(<4 x half>)
+declare <4 x half> @llvm.sqrt.v4f16(<4 x half>)
+
+declare <5 x half> @llvm.sin.v5f16(<5 x half>)
+declare <5 x half> @llvm.cos.v5f16(<5 x half>)
+declare <5 x half> @llvm.log.v5f16(<5 x half>)
+declare <5 x half> @llvm.log2.v5f16(<5 x half>)
+declare <5 x half> @llvm.log10.v5f16(<5 x half>)
+declare <5 x half> @llvm.exp.v5f16(<5 x half>)
+declare <5 x half> @llvm.exp2.v5f16(<5 x half>)
+declare <5 x half> @llvm.exp10.v5f16(<5 x half>)
+declare <5 x half> @llvm.sqrt.v5f16(<5 x half>)
+
+
+define <1 x half> @sin_v1f16(<1 x half> %a) {
+; GFX8-LABEL: sin_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX8-NEXT: v_fract_f16_e32 v0, v0
+; GFX8-NEXT: v_sin_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: sin_v1f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX9-NEXT: v_sin_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: sin_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX10-NEXT: v_sin_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: sin_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_sin_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.sin.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @cos_v1f16(<1 x half> %a) {
+; GFX8-LABEL: cos_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX8-NEXT: v_fract_f16_e32 v0, v0
+; GFX8-NEXT: v_cos_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: cos_v1f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX9-NEXT: v_cos_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: cos_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX10-NEXT: v_cos_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: cos_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cos_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.cos.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @log_v1f16(<1 x half> %a) {
+; GFX8-LABEL: log_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: log_v1f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_log_f16_e32 v0, v0
+; GFX906-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: log_v1f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_log_f16_e32 v0, v0
+; GFX908-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX908-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: log_v1f16:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_log_f16_e32 v0, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: log_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_log_f16_e32 v0, v0
+; GFX10-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: log_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0x398c, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.log.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @log2_v1f16(<1 x half> %a) {
+; GFX8-LABEL: log2_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: log2_v1f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: log2_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_log_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: log2_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.log2.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @log10_v1f16(<1 x half> %a) {
+; GFX8-LABEL: log10_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: log10_v1f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_log_f16_e32 v0, v0
+; GFX906-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: log10_v1f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_log_f16_e32 v0, v0
+; GFX908-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX908-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: log10_v1f16:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_log_f16_e32 v0, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: log10_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_log_f16_e32 v0, v0
+; GFX10-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: log10_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f16_e32 v0, 0x34d1, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.log10.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @exp_v1f16(<1 x half> %a) {
+; GFX8-LABEL: exp_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: exp_v1f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX906-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX906-NEXT: v_exp_f32_e32 v0, v0
+; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: exp_v1f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX908-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX908-NEXT: v_exp_f32_e32 v0, v0
+; GFX908-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX908-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: exp_v1f16:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX942-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX942-NEXT: v_exp_f32_e32 v0, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: exp_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: exp_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.exp.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @exp2_v1f16(<1 x half> %a) {
+; GFX8-LABEL: exp2_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: exp2_v1f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_exp_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: exp2_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_exp_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: exp2_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_exp_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.exp2.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @exp10_v1f16(<1 x half> %a) {
+; GFX8-LABEL: exp10_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: exp10_v1f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX906-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX906-NEXT: v_exp_f32_e32 v0, v0
+; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: exp10_v1f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX908-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX908-NEXT: v_exp_f32_e32 v0, v0
+; GFX908-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX908-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: exp10_v1f16:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX942-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX942-NEXT: v_exp_f32_e32 v0, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: exp10_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: exp10_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.exp10.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <1 x half> @sqrt_v1f16(<1 x half> %a) {
+; GFX8-LABEL: sqrt_v1f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: sqrt_v1f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: sqrt_v1f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: sqrt_v1f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sqrt_f16_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <1 x half> @llvm.sqrt.v1f16(<1 x half> %a)
+ ret <1 x half> %res
+}
+
+define <2 x half> @sin_v2f16(<2 x half> %a) {
+; GFX8-LABEL: sin_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x3118
+; GFX8-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX8-NEXT: v_fract_f16_e32 v1, v1
+; GFX8-NEXT: v_fract_f16_e32 v0, v0
+; GFX8-NEXT: v_sin_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_sin_f16_e32 v0, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: sin_v2f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_mov_b32_e32 v2, 0x3118
+; GFX906-NEXT: v_mul_f16_e32 v1, 0.15915494, v0
+; GFX906-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_sin_f16_e32 v1, v1
+; GFX906-NEXT: v_sin_f16_e32 v0, v0
+; GFX906-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: sin_v2f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x3118
+; GFX908-NEXT: v_mul_f16_e32 v1, 0.15915494, v0
+; GFX908-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX908-NEXT: v_sin_f16_e32 v1, v1
+; GFX908-NEXT: v_sin_f16_e32 v0, v0
+; GFX908-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX908-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: sin_v2f16:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v2, 0x3118
+; GFX942-NEXT: v_mul_f16_e32 v1, 0.15915494, v0
+; GFX942-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT: v_sin_f16_e32 v1, v1
+; GFX942-NEXT: v_sin_f16_e32 v0, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: sin_v2f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v1, 0x3118
+; GFX10-NEXT: v_mul_f16_e32 v2, 0.15915494, v0
+; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT: v_sin_f16_e32 v1, v2
+; GFX10-NEXT: v_sin_f16_e32 v0, v0
+; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: sin_v2f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mul_f16_e32 v1, 0.15915494, v1
+; GFX11-NEXT: v_sin_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_sin_f16_e32 v1, v1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %res = call <2 x half> @llvm.sin.v2f16(<2 x half> %a)
+ ret <2 x half> %res
+}
+
+define <2 x half> @cos_v2f16(<2 x half> %a) {
+; GFX8-LABEL: cos_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x3118
+; GFX8-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_e32 v0, 0.15915494, v0
+; GFX8-NEXT: v_fract_f16_e32 v1, v1
+; GFX8-NEXT: v_fract_f16_e32 v0, v0
+; GFX8-NEXT: v_cos_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_cos_f16_e32 v0, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: cos_v2f16:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_mov_b32_e32 v2, 0x3118
+; GFX906-NEXT: v_mul_f16_e32 v1, 0.15915494, v0
+; GFX906-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX906-NEXT: v_cos_f16_e32 v1, v1
+; GFX906-NEXT: v_cos_f16_e32 v0, v0
+; GFX906-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX908-LABEL: cos_v2f16:
+; GFX908: ; %bb.0:
+; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x3118
+; GFX908-NEXT: v_mul_f16_e32 v1, 0.15915494, v0
+; GFX908-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX908-NEXT: v_cos_f16_e32 v1, v1
+; GFX908-NEXT: v_cos_f16_e32 v0, v0
+; GFX908-NEXT: v_pack_b32_f16 v0, v1, v...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand what this is adding. This is just basic type coverage for all these intrinsics (which should already be covered in intrinsic specific, existing tests)
%res = call <1 x half> @llvm.exp2.v1f16(<1 x half> %a) | ||
ret <1 x half> %res |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix these to use 2 space indent
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't know, in my editor, its showing 2 spaces indent only !
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like you've got tabs?
; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX906 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX908 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX942 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need -verify-machineinstrs
Agreed, I made it see the effects of #137137 in one testcase file. As you said, it can be seen across specific intrinsics as well. So, should we go with this PR ?? |
No, if there are missing type cases in the existing tests they can be added there. As it is this looks wholly redundant |
Here, the main differentiation should be vectors of fp16 (having different element numbers), in order to see how packing is handled in different scenarios. As for missing, we could compare the changes observed in existing cases by #137137 to identify ones those are missing & let it be here, I guess! |
All of the base intrinsic tests should cover all the interesting number of vector elements. This isn't covering any different source type packing situations, these are all just direct intrinsic uses. |
This adds llc LIT test for vector fp16 operations like log, exp, etc. Its act as the pre-commit test for github PR: #137137