diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index d09e1da457249..d3076b1ecf29a 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1306,6 +1306,48 @@ let TargetPrefix = "nvvm" in { DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem, IntrSpeculatable, Commutative]>; + // Mixed-precision add intrinsics for half and bfloat16 to float + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach sat = ["", "_sat"] in { + // Half-precision to float + def int_nvvm_add_#rnd#sat#_h_f + : ClangBuiltin<"__nvvm_add_"#rnd#sat#"_h_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_half_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; + + // BFloat16 to float + def int_nvvm_add_#rnd#sat#_bf_f + : ClangBuiltin<"__nvvm_add_"#rnd#sat#"_bf_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_bfloat_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; + } + } + + // + // Sub + // + + // Mixed-precision subtraction intrinsics for half and bfloat16 to float + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach sat = ["", "_sat"] in { + // Half-precision to float + def int_nvvm_sub_#rnd#sat#_h_f + : ClangBuiltin<"__nvvm_sub_"#rnd#sat#"_h_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_half_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; + + // BFloat16 to float + def int_nvvm_sub_#rnd#sat#_bf_f + : ClangBuiltin<"__nvvm_sub_"#rnd#sat#"_bf_f">, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_bfloat_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; + } + } + // // Dot Product // diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 4ba3e6f06bb5f..abb94e2ec0b70 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1656,6 +1656,51 @@ def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; +// Define mixed-precision add instructions for half and bfloat16 to float +foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach sat = ["", "_sat"] in { + // Half-precision to float + def INT_NVVM_ADD_#!toupper(rnd#sat)#_H_F + : F_MATH_2<"add."#rnd#!subst("_", ".", + sat)#".f32.f16 \t$dst, $src0, $src1;", + Float32Regs, Int16Regs, Float32Regs, + !cast("int_nvvm_add_"#rnd#sat#"_h_f"), + [hasPTX<86>, hasSM<100>]>; + + // BFloat16 to float + def INT_NVVM_ADD_#!toupper(rnd#sat)#_BF_F + : F_MATH_2<"add."#rnd#!subst("_", ".", + sat)#".f32.bf16 \t$dst, $src0, $src1;", + Float32Regs, Int16Regs, Float32Regs, + !cast("int_nvvm_add_"#rnd#sat#"_bf_f"), + [hasPTX<86>, hasSM<100>]>; + } +} + +// +// Sub +// +// Define mixed-precision sub instructions for half and bfloat16 to float +foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach sat = ["", "_sat"] in { + // Half-precision to float + def INT_NVVM_SUB_#!toupper(rnd#sat)#_H_F + : F_MATH_2<"sub."#rnd#!subst("_", ".", + sat)#".f32.f16 \t$dst, $src0, $src1;", + Float32Regs, Int16Regs, Float32Regs, + !cast("int_nvvm_sub_"#rnd#sat#"_h_f"), + [hasPTX<86>, hasSM<100>]>; + + // BFloat16 to float + def INT_NVVM_SUB_#!toupper(rnd#sat)#_BF_F + : F_MATH_2<"sub."#rnd#!subst("_", ".", + sat)#".f32.bf16 \t$dst, $src0, $src1;", + Float32Regs, Int16Regs, Float32Regs, + !cast("int_nvvm_sub_"#rnd#sat#"_bf_f"), + [hasPTX<86>, hasSM<100>]>; + } +} + // // BFIND // diff --git a/llvm/test/CodeGen/NVPTX/arith-mix-precision.ll b/llvm/test/CodeGen/NVPTX/arith-mix-precision.ll new file mode 100644 index 0000000000000..444300dd40ec3 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/arith-mix-precision.ll @@ -0,0 +1,515 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx86 | FileCheck %s + +define float @test_add_rn_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rn_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rn_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rn_h_f_param_1]; +; CHECK-NEXT: add.rn.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rn.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rn_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rn_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rn_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rn_sat_h_f_param_1]; +; CHECK-NEXT: add.rn.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rn.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rz_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rz_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rz_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rz_h_f_param_1]; +; CHECK-NEXT: add.rz.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rz.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rz_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rz_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rz_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rz_sat_h_f_param_1]; +; CHECK-NEXT: add.rz.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rz.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rm_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rm_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rm_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rm_h_f_param_1]; +; CHECK-NEXT: add.rm.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rm.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rm_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rm_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rm_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rm_sat_h_f_param_1]; +; CHECK-NEXT: add.rm.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rm.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rp_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rp_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rp_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rp_h_f_param_1]; +; CHECK-NEXT: add.rp.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rp.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rp_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_add_rp_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rp_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rp_sat_h_f_param_1]; +; CHECK-NEXT: add.rp.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rp.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_add_rn_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rn_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rn_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rn_bf_f_param_1]; +; CHECK-NEXT: add.rn.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rn.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rn_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rn_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rn_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rn_sat_bf_f_param_1]; +; CHECK-NEXT: add.rn.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rn.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rz_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rz_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rz_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rz_bf_f_param_1]; +; CHECK-NEXT: add.rz.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rz.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rz_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rz_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rz_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rz_sat_bf_f_param_1]; +; CHECK-NEXT: add.rz.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rz.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rm_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rm_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rm_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rm_bf_f_param_1]; +; CHECK-NEXT: add.rm.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rm.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rm_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rm_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rm_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rm_sat_bf_f_param_1]; +; CHECK-NEXT: add.rm.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rm.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rp_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rp_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rp_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rp_bf_f_param_1]; +; CHECK-NEXT: add.rp.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rp.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_add_rp_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_add_rp_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_add_rp_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_add_rp_sat_bf_f_param_1]; +; CHECK-NEXT: add.rp.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.add.rp.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +; Sub +define float @test_sub_rn_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rn_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rn_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rn_h_f_param_1]; +; CHECK-NEXT: sub.rn.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rn.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rn_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rn_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rn_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rn_sat_h_f_param_1]; +; CHECK-NEXT: sub.rn.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rn.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rz_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rz_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rz_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rz_h_f_param_1]; +; CHECK-NEXT: sub.rz.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rz.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rz_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rz_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rz_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rz_sat_h_f_param_1]; +; CHECK-NEXT: sub.rz.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rz.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rm_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rm_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rm_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rm_h_f_param_1]; +; CHECK-NEXT: sub.rm.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rm.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rm_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rm_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rm_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rm_sat_h_f_param_1]; +; CHECK-NEXT: sub.rm.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rm.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rp_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rp_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rp_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rp_h_f_param_1]; +; CHECK-NEXT: sub.rp.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rp.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rp_sat_h_f(half %a, float %b) { +; CHECK-LABEL: test_sub_rp_sat_h_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rp_sat_h_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rp_sat_h_f_param_1]; +; CHECK-NEXT: sub.rp.sat.f32.f16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rp.sat.h.f(half %a, float %b) + ret float %res +} + +define float @test_sub_rn_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rn_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rn_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rn_bf_f_param_1]; +; CHECK-NEXT: sub.rn.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rn.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rn_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rn_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rn_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rn_sat_bf_f_param_1]; +; CHECK-NEXT: sub.rn.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rn.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rz_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rz_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rz_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rz_bf_f_param_1]; +; CHECK-NEXT: sub.rz.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rz.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rz_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rz_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rz_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rz_sat_bf_f_param_1]; +; CHECK-NEXT: sub.rz.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rz.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rm_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rm_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rm_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rm_bf_f_param_1]; +; CHECK-NEXT: sub.rm.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rm.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rm_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rm_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rm_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rm_sat_bf_f_param_1]; +; CHECK-NEXT: sub.rm.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rm.sat.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rp_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rp_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rp_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rp_bf_f_param_1]; +; CHECK-NEXT: sub.rp.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rp.bf.f(bfloat %a, float %b) + ret float %res +} + +define float @test_sub_rp_sat_bf_f(bfloat %a, float %b) { +; CHECK-LABEL: test_sub_rp_sat_bf_f( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test_sub_rp_sat_bf_f_param_0]; +; CHECK-NEXT: ld.param.f32 %f1, [test_sub_rp_sat_bf_f_param_1]; +; CHECK-NEXT: sub.rp.sat.f32.bf16 %f2, %rs1, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; +; CHECK-NEXT: ret; + %res = call float @llvm.nvvm.sub.rp.sat.bf.f(bfloat %a, float %b) + ret float %res +}