Skip to content

Commit d30554b

Browse files
authored
[mlir][xegpu] SIMT distribution patterns for XeGPU CreateNdTdesc, LoadNd, StoreNd and Dpas Ops. (#135271)
This PR adds the SIMT distribution patterns for create_nd_tdesc, load_nd, store_nd and dpas XeGPU ops.
1 parent 1531dfc commit d30554b

File tree

11 files changed

+1582
-507
lines changed

11 files changed

+1582
-507
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td

-5
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,6 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
189189
return scatter_attr.getChunkSize().getInt();
190190
return 1;
191191
}
192-
193-
// This returns a vector type that represents the fragment of data owned by
194-
// a work item in SIMT mode if this tensor descriptor is used in a XeGPU
195-
// load/store operation.
196-
FailureOr<VectorType> getDistributedVectorType();
197192
}];
198193

199194
let hasCustomAssemblyFormat = true;

mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ namespace xegpu {
1616

1717
/// Appends patterns for folding aliasing ops into XeGPU ops into `patterns`.
1818
void populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns);
19+
/// Appends patterns for XeGPU SIMT distribution into `patterns`.
20+
void populateXeGPUSubgroupDistributePatterns(RewritePatternSet &patterns);
1921

2022
} // namespace xegpu
2123
} // namespace mlir
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10+
#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11+
12+
#include "mlir/IR/BuiltinTypes.h"
13+
namespace mlir {
14+
15+
class VectorType;
16+
namespace xegpu {
17+
class LayoutAttr;
18+
class TensorDescType;
19+
} // namespace xegpu
20+
21+
namespace xegpu {
22+
23+
/// If tensor descriptor has a layout attribute it is used in SIMT mode.
24+
/// In this mode, the distributed vector shape is determined as follows:
25+
/// Definitions:
26+
/// lane_data_size = lane_data[0] × lane_data[1]
27+
/// subgroup_size = lane_layout[0] × lane_layout[1]
28+
/// distribution_unit_size = subgroup_size × lane_data_size
29+
///
30+
/// Case 1: Regular loads/stores.
31+
/// The following conditions must be met:
32+
/// * tensor_desc[0] == lane_layout[0]
33+
/// Distributed vector is a 1D vector with shape:
34+
/// [chunk_size]
35+
///
36+
/// Case 2: Block loads/stores
37+
/// Additional definitions:
38+
/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
39+
/// n_distribution_units = tensor_size / distribution_unit_size
40+
/// fragment_size = n_distribution_units * lane_data_size
41+
/// Given above definitions, the following conditions must be met:
42+
/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
43+
/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
44+
/// Distributed vector is a 1D vector with shape:
45+
/// [fragment_size]
46+
FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
47+
48+
/// Helper to get the distributed vector type for a given vector type according
49+
/// to a given LayoutAttr.
50+
FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
51+
LayoutAttr layout);
52+
53+
} // namespace xegpu
54+
55+
} // namespace mlir
56+
57+
#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_

mlir/lib/Dialect/XeGPU/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
add_subdirectory(IR)
22
add_subdirectory(Transforms)
3+
add_subdirectory(Utils)

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

-68
Original file line numberDiff line numberDiff line change
@@ -376,74 +376,6 @@ LogicalResult TensorDescType::verify(
376376
return success();
377377
}
378378

379-
// If tensor descriptor has a layout attribute it is used in SIMT mode.
380-
// In this mode, the distributed vector shape is determined as follows:
381-
// Definitions:
382-
// lane_data_size = lane_data[0] × lane_data[1]
383-
// subgroup_size = lane_layout[0] × lane_layout[1]
384-
// distribution_unit_size = subgroup_size × lane_data_size
385-
// ---------------------------------------------------------------------
386-
// Case 1: Regular loads/stores.
387-
// ---------------------------------------------------------------------
388-
// The following conditions must be met:
389-
// * tensor_desc[0] == lane_layout[0]
390-
// Distributed vector is a 1D vector with shape:
391-
// [chunk_size]
392-
// ---------------------------------------------------------------------
393-
// Case 2: Block loads/stores
394-
// ---------------------------------------------------------------------
395-
// Additional definitions:
396-
// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
397-
// n_distribution_units = tensor_size / distribution_unit_size
398-
// fragment_size = n_distribution_units * lane_data_size
399-
// Given above definitions, the following conditions must be met:
400-
// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
401-
// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
402-
// Distributed vector is a 1D vector with shape:
403-
// [fragment_size]
404-
FailureOr<VectorType> TensorDescType::getDistributedVectorType() {
405-
auto layout = llvm::dyn_cast_if_present<LayoutAttr>(getLayout());
406-
// It only works for subgroup level layout, which only has lane_layout
407-
// and lane_data, and is to distribute a SIMD code into SIMT code.
408-
if (!layout || !layout.isSgLayout())
409-
return failure();
410-
411-
SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
412-
SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
413-
auto tdescShape = getShape();
414-
415-
// compute sgSize by multiply elements of laneLayout
416-
// e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
417-
// e.g. for 1D layout, sgSize = laneLayout[0]
418-
auto sgSize = std::accumulate(laneLayout.begin(), laneLayout.end(), 1,
419-
std::multiplies<int64_t>());
420-
421-
// Case 1: regular loads/stores
422-
auto scatterAttr = getEncodingAsScatterTensorDescAttr();
423-
if (scatterAttr) {
424-
auto chunkSize = scatterAttr.getChunkSize().getInt();
425-
// Verify if the first dimension of the tensor descriptor shape is
426-
// distributable.
427-
assert(tdescShape[0] == laneLayout[0] &&
428-
"tensor descriptor shape is not distributable");
429-
return VectorType::get({chunkSize}, getElementType());
430-
}
431-
432-
// Case 2: block loads/stores
433-
// Check if the tensor descriptor shape is distributable.
434-
int64_t tensorSize = 1;
435-
for (auto [tdescDim, laneDim, laneDataDim] :
436-
llvm::zip_equal(tdescShape, laneLayout, laneData)) {
437-
assert((tdescDim % (laneDim * laneDataDim) == 0) &&
438-
"tensor descriptor shape is not distributable");
439-
tensorSize *= tdescDim;
440-
}
441-
// tensorSize must be adjusted for array_length.
442-
tensorSize *= getArrayLength();
443-
444-
return VectorType::get({tensorSize / sgSize}, getElementType());
445-
}
446-
447379
} // namespace xegpu
448380
} // namespace mlir
449381

mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@ add_mlir_dialect_library(MLIRXeGPUTransforms
1616
MLIRPass
1717
MLIRTransforms
1818
MLIRGPUDialect
19+
MLIRXeGPUUtils
20+
MLIRGPUUtils
21+
MLIRVectorTransforms
1922
)

0 commit comments

Comments
 (0)