[RISCV] Mark f16/bf16 lrint and llrint cost as invalid (llvm#113924)

lukel97 · web-flow · commit e989e31a4737 · 2024-10-30T17:21:18.000+02:00
We currently can't lower scalable vector lrint and llrint nodes for bf16
and f16, even with zvfh, and will crash.

Mark the cost as invalid for now to prevent the vectorizers from
emitting them.

Note that we can actually lower fixed-length vectors fine by scalarizing
them, but we were still undercosting these too so I've also included
them. I presume there's an opportunity to improve the codegen later on.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -948,12 +948,17 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                     TTI::TargetCostKind CostKind) {
   auto *RetTy = ICA.getReturnType();
   switch (ICA.getID()) {
+  case Intrinsic::lrint:
+  case Intrinsic::llrint:
+    // We can't currently lower half or bfloat vector lrint/llrint.
+    if (auto *VecTy = dyn_cast<VectorType>(ICA.getArgTypes()[0]);
+        VecTy && VecTy->getElementType()->is16bitFPTy())
+      return InstructionCost::getInvalid();
+    [[fallthrough]];
   case Intrinsic::ceil:
   case Intrinsic::floor:
   case Intrinsic::trunc:
   case Intrinsic::rint:
-  case Intrinsic::lrint:
-  case Intrinsic::llrint:
   case Intrinsic::round:
   case Intrinsic::roundeven: {
     // These all use the same code.
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -425,15 +425,15 @@ define void @rint_fp16() {
 define void @lrint() {
 ; CHECK-LABEL: 'lrint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
@@ -490,15 +490,15 @@ define void @lrint() {
 define void @lrint_fp16() {
 ; CHECK-LABEL: 'lrint_fp16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i64 @llvm.lrint.f16(half undef)
@@ -517,15 +517,15 @@ define void @lrint_fp16() {
 define void @llrint() {
 ; CHECK-LABEL: 'llrint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
@@ -582,15 +582,15 @@ define void @llrint() {
 define void @llrint_fp16() {
 ; CHECK-LABEL: 'llrint_fp16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i64 @llvm.llrint.f16(half undef)