llvm · ImanHosseini · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
@@ -25,7 +25,6 @@
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/IRMapping.h"
@@ -463,10 +462,148 @@ void vector::MultiDimReductionOp::build(OpBuilder &builder,
   build(builder, result, kind, source, acc, reductionDims);
 }
 
+/// TODO: Move to APFloat/APInt.
+/// Computes the result of reducing a constant vector where the accumulator
+/// value, `acc`, is also constant. `times` is the number of times the operation
+/// is applied.
+static APFloat computePowerOf(const APFloat &a, int64_t exponent) {
+  assert(exponent >= 0 && "negative exponents not supported.");
+  if (exponent == 0) {
+    return APFloat::getOne(a.getSemantics());
+  }
+  APFloat acc = a;
+  int64_t remainingExponent = exponent;
+  while (remainingExponent > 1) {
+    if (remainingExponent % 2 == 0) {
+      acc = acc * acc;
+      remainingExponent /= 2;
+    } else {
+      acc = acc * a;
+      --remainingExponent;
+    }
+  }
+  return acc;
+};
+
+static APInt computePowerOf(const APInt &a, int64_t exponent) {
+  assert(exponent >= 0 && "negative exponents not supported.");
+  if (exponent == 0) {
+    return APInt(a.getBitWidth(), 1);
+  }
+  APInt acc = a;
+  int64_t remainingExponent = exponent;
+  while (remainingExponent > 1) {
+    if (remainingExponent % 2 == 0) {
+      acc = acc * acc;
+      remainingExponent /= 2;
+    } else {
+      acc = acc * a;
+      remainingExponent--;
+    }
+  }
+  return acc;
+};
+
+static OpFoldResult computeConstantReduction(FloatAttr src, FloatAttr acc,
+                                             int64_t times, CombiningKind kind,
+                                             ShapedType dstType) {
+  APFloat srcVal = src.getValue();
+  APFloat accVal = acc.getValue();
+  switch (kind) {
+  case CombiningKind::ADD: {
+    APFloat n = APFloat(srcVal.getSemantics());
+    n.convertFromAPInt(APInt(64, times, true), true,
+                       APFloat::rmNearestTiesToEven);
+    return DenseElementsAttr::get(dstType, {accVal + srcVal * n});
+  }
+  case CombiningKind::MUL: {
+    return DenseElementsAttr::get(dstType,
+                                  {accVal * computePowerOf(srcVal, times)});
+  }
+  case CombiningKind::MINIMUMF:
+    return DenseElementsAttr::get(dstType, {llvm::minimum(accVal, srcVal)});
+  case CombiningKind::MAXIMUMF:
+    return DenseElementsAttr::get(dstType, {llvm::maximum(accVal, srcVal)});
+  case CombiningKind::MINNUMF:
+    return DenseElementsAttr::get(dstType, {llvm::minnum(accVal, srcVal)});
+  case CombiningKind::MAXNUMF:
+    return DenseElementsAttr::get(dstType, {llvm::maxnum(accVal, srcVal)});
+  default:
+    return {};
+  }
+}
+
+static OpFoldResult computeConstantReduction(IntegerAttr src, IntegerAttr acc,
+                                             int64_t times, CombiningKind kind,
+                                             ShapedType dstType) {
+  APInt srcVal = src.getValue();
+  APInt accVal = acc.getValue();
+
+  switch (kind) {
+  case CombiningKind::ADD:
+    return DenseElementsAttr::get(dstType, {accVal + srcVal * times});
+  case CombiningKind::MUL: {
+    return DenseElementsAttr::get(dstType,
+                                  {accVal * computePowerOf(srcVal, times)});
+  }
+  case CombiningKind::MINSI:
+    return DenseElementsAttr::get(dstType,
+                                  {accVal.slt(srcVal) ? accVal : srcVal});
+  case CombiningKind::MAXSI:
+    return DenseElementsAttr::get(dstType,
+                                  {accVal.ugt(srcVal) ? accVal : srcVal});
+  case CombiningKind::MINUI:
+    return DenseElementsAttr::get(dstType,
+                                  {accVal.ult(srcVal) ? accVal : srcVal});
+  case CombiningKind::MAXUI:
+    return DenseElementsAttr::get(dstType,
+                                  {accVal.ugt(srcVal) ? accVal : srcVal});
+  case CombiningKind::AND:
+    return DenseElementsAttr::get(dstType, {accVal & srcVal});
+  case CombiningKind::OR:
+    return DenseElementsAttr::get(dstType, {accVal | srcVal});
+  case CombiningKind::XOR:
+    return DenseElementsAttr::get(dstType,
+                                  {times & 0x1 ? accVal ^ srcVal : accVal});
+  default:
+    return {};
+  }
+}
+
 OpFoldResult MultiDimReductionOp::fold(FoldAdaptor adaptor) {
   // Single parallel dim, this is a noop.
   if (getSourceVectorType().getRank() == 1 && !isReducedDim(0))
     return getSource();
+
+  auto srcAttr = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSource());
+  auto accAttr = dyn_cast_or_null<DenseElementsAttr>(adaptor.getAcc());
+  if (!srcAttr || !accAttr || !srcAttr.isSplat() || !accAttr.isSplat())
+    return {};
+
+  ArrayRef<int64_t> reductionDims = getReductionDims();
+  auto srcType = cast<ShapedType>(getSourceVectorType());
+  ArrayRef<int64_t> srcDims = srcType.getShape();
+
+  int64_t times = 1;
+  for (int64_t dim : reductionDims) {
+    times *= srcDims[dim];
+  }
+
+  CombiningKind kind = getKind();
+  auto dstType = cast<ShapedType>(getDestType());
+  Type dstEltType = dstType.getElementType();
+
+  if (mlir::dyn_cast_or_null<FloatType>(dstEltType)) {
+    return computeConstantReduction(srcAttr.getSplatValue<FloatAttr>(),
+                                    accAttr.getSplatValue<FloatAttr>(), times,
+                                    kind, dstType);
+  }
+  if (mlir::dyn_cast_or_null<IntegerType>(dstEltType)) {
+    return computeConstantReduction(srcAttr.getSplatValue<IntegerAttr>(),
+                                    accAttr.getSplatValue<IntegerAttr>(), times,
+                                    kind, dstType);
+  }
+
   return {};
 }
 

diff --git a/mlir/test/Dialect/Vector/constant-fold.mlir b/mlir/test/Dialect/Vector/constant-fold.mlir
@@ -11,3 +11,84 @@ func.func @fold_extract_transpose_negative(%arg0: vector<4x4xf16>) -> vector<4x4
   %2 = vector.extract %1[0] : vector<4x4xf16> from vector<1x4x4xf16>
   return %2 : vector<4x4xf16>
 }
+
+// CHECK-LABEL: fold_multi_reduction_f32_add
+func.func @fold_multi_reduction_f32_add() -> vector<1xf32> {
+  %acc = arith.constant dense<0.000000e+00> : vector<1xf32>
+  %0 = arith.constant dense<1.000000e+00> : vector<1x128x128xf32>
+  // CHECK: %{{.*}} = arith.constant dense<1.638400e+04> : vector<1xf32>
+  %1 = vector.multi_reduction <add>, %0, %acc [1, 2] : vector<1x128x128xf32> to vector<1xf32>
+  return %1 : vector<1xf32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_f32_mul
+func.func @fold_multi_reduction_f32_mul() -> vector<1xf32> {
+  %acc = arith.constant dense<1.000000e+00> : vector<1xf32>
+  %0 = arith.constant dense<2.000000e+00> : vector<1x2x2xf32>
+  // CHECK: %{{.*}} = arith.constant dense<1.600000e+01> : vector<1xf32>
+  %1 = vector.multi_reduction <mul>, %0, %acc [1, 2] : vector<1x2x2xf32> to vector<1xf32>
+  return %1 : vector<1xf32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_f32_maximumf
+func.func @fold_multi_reduction_f32_maximumf() -> vector<1xf32> {
+  %acc = arith.constant dense<1.000000e+00> : vector<1xf32>
+  %0 = arith.constant dense<2.000000e+00> : vector<1x2x2xf32>
+  // CHECK: %{{.*}} = arith.constant dense<2.000000e+00> : vector<1xf32>
+  %1 = vector.multi_reduction <maximumf>, %0, %acc [1, 2] : vector<1x2x2xf32> to vector<1xf32>
+  return %1 : vector<1xf32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_f32_minnumf
+func.func @fold_multi_reduction_f32_minnumf() -> vector<1xf32> {
+  %acc = arith.constant dense<1.000000e+00> : vector<1xf32>
+  %0 = arith.constant dense<0xFFFFFFFF> : vector<1x2x2xf32>
+  // CHECK: %{{.*}} = arith.constant dense<1.000000e+00> : vector<1xf32>
+  %1 = vector.multi_reduction <minnumf>, %0, %acc [1, 2] : vector<1x2x2xf32> to vector<1xf32>
+  return %1 : vector<1xf32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_f32_minimumf
+func.func @fold_multi_reduction_f32_minimumf() -> vector<1xf32> {
+  %acc = arith.constant dense<1.000000e+00> : vector<1xf32>
+  %0 = arith.constant dense<0xFFFFFFFF> : vector<1x2x2xf32>
+  // CHECK: %{{.*}} = arith.constant dense<0xFFFFFFFF> : vector<1xf32>
+  %1 = vector.multi_reduction <minimumf>, %0, %acc [1, 2] : vector<1x2x2xf32> to vector<1xf32>
+  return %1 : vector<1xf32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_i32_add
+func.func @fold_multi_reduction_i32_add() -> vector<1xi32> {
+  %acc = arith.constant dense<1> : vector<1xi32>
+  %0 = arith.constant dense<1> : vector<1x128x128xi32>
+  // CHECK: %{{.*}} = arith.constant dense<16385> : vector<1xi32>
+  %1 = vector.multi_reduction <add>, %0, %acc [1, 2] : vector<1x128x128xi32> to vector<1xi32>
+  return %1 : vector<1xi32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_i32_xor_odd_num_elements
+func.func @fold_multi_reduction_i32_xor_odd_num_elements() -> vector<1xi32> {
+  %acc = arith.constant dense<0xFF> : vector<1xi32>
+  %0 = arith.constant dense<0xA0A> : vector<1x3xi32>
+  // CHECK: %{{.*}} = arith.constant dense<2805> : vector<1xi32>
+  %1 = vector.multi_reduction <xor>, %0, %acc [1] : vector<1x3xi32> to vector<1xi32>
+  return %1 : vector<1xi32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_i32_xor_even_num_elements
+func.func @fold_multi_reduction_i32_xor_even_num_elements() -> vector<1xi32> {
+  %acc = arith.constant dense<0xFF> : vector<1xi32>
+  %0 = arith.constant dense<0xA0A> : vector<1x4xi32>
+  // CHECK: %{{.*}} = arith.constant dense<255> : vector<1xi32>
+  %1 = vector.multi_reduction <xor>, %0, %acc [1] : vector<1x4xi32> to vector<1xi32>
+  return %1 : vector<1xi32>
+}
+
+// CHECK-LABEL: fold_multi_reduction_i64_add
+func.func @fold_multi_reduction_i64_add() -> vector<1xi64> {
+  %acc = arith.constant dense<1> : vector<1xi64>
+  %0 = arith.constant dense<1> : vector<1x128x128xi64>
+  // CHECK: %{{.*}} = arith.constant dense<16385> : vector<1xi64>
+  %1 = vector.multi_reduction <add>, %0, %acc [1, 2] : vector<1x128x128xi64> to vector<1xi64>
+  return %1 : vector<1xi64>
+}