-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Flang][OpenMP] Add global address space to globals for target device #119585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-flang-codegen @llvm/pr-subscribers-flang-fir-hlfir Author: None (agozillon) ChangesCurrently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily). This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness 1/3 required PRs to enable declare target to mapping, should look at PR 3/3 to check for full green passes (this one will fail a number due to some dependencies). Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla@amd.com> Full diff: https://github.com/llvm/llvm-project/pull/119585.diff 7 Files Affected:
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index b772c523caba49..0b7cbe92f4d879 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -774,6 +774,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
/// Get the address space which should be used for allocas
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);
+uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
+
+uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
+
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 35749dae5d7e9f..1659ede937f017 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
unsigned
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+ unsigned
+ getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+
const fir::FIRToLLVMPassOptions &options;
using ConvertToLLVMPattern::match;
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3a39c455015f9f..ff98d13ad86c24 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1721,3 +1721,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}
+
+uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
+
+uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 5345d64c330f06..1b79b9d9975dde 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -132,6 +132,38 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
}
namespace {
+
+mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
+ mlir::Location loc,
+ std::uint64_t globalAS,
+ std::uint64_t programAS,
+ llvm::StringRef symName, mlir::Type type,
+ mlir::Operation *replaceOp = nullptr) {
+ if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
+ if (globalAS != programAS) {
+ auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
+ replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
+ llvmAddrOp);
+ return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
+ }
+
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
+ replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ return rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ }
+
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
+ symName);
+ return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
+}
+
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
using FIROpConversion::FIROpConversion;
@@ -139,9 +171,15 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- auto ty = convertType(addr.getType());
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- addr, ty, addr.getSymbol().getRootReference().getValue());
+ auto global = addr->getParentOfType<mlir::ModuleOp>()
+ .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
return mlir::success();
}
};
@@ -1255,14 +1293,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());
+ mlir::DataLayout dataLayout(mod);
if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return replaceWithAddrOfOrASCast(
+ rewriter, loc, fir::factory::getGlobalAddressSpace(&dataLayout),
+ fir::factory::getProgramAddressSpace(&dataLayout),
+ global.getSymName(), llvmPtrTy);
}
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
// The global may have already been translated to LLVM.
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return replaceWithAddrOfOrASCast(
+ rewriter, loc, global.getAddrSpace(),
+ fir::factory::getProgramAddressSpace(&dataLayout),
+ global.getSymName(), llvmPtrTy);
}
// Type info derived types do not have type descriptors since they are the
// types defining type descriptors.
@@ -2759,12 +2802,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ global.getAddrSpace(),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
}
return mlir::failure();
@@ -2855,8 +2902,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
mlir::SymbolRefAttr comdat;
llvm::ArrayRef<mlir::NamedAttribute> attrs;
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
- loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
- false, false, comdat, attrs, dbgExprs);
+ loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
+ getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
if (global.getAlignment() && *global.getAlignment() > 0)
g.setAlignment(*global.getAlignment());
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 12021deb4bd97a..a352e00649c23c 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getAllocaMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getProgramMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return defaultAddressSpace;
}
+unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
+ assert(parentOp != nullptr &&
+ "expected insertion block to have parent operation");
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
+ if (mlir::Attribute addrSpace =
+ mlir::DataLayout(module).getGlobalMemorySpace())
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return defaultAddressSpace;
+}
+
} // namespace fir
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 4c9f965e1241a0..a5d741d7f566ad 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -3,8 +3,8 @@
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
//===================================================
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
fir.has_value %1 : i32
}
-// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external @g_i0()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.return %[[C0]] : i32
// CHECK: }
@@ -29,7 +32,10 @@ fir.global @g_ci5 constant : i32 {
fir.has_value %c : i32
}
-// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external constant @g_ci5()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
// CHECK: llvm.return %[[C5]] : i32
// CHECK: }
@@ -37,17 +43,26 @@ fir.global @g_ci5 constant : i32 {
// -----
fir.global internal @i_i515 (515:i32) : i32
-// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global common @C_i511 (0:i32) : i32
-// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global common @C_i511(0 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global weak @w_i86 (86:i32) : i32
-// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
fir.has_value %0 : i64
}
-// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
-// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
+// CHECK: llvm.mlir.global external @[[SYMBOL]]()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i64 {
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.return %{{.*}} : i64
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
fir.has_value %2 : !fir.array<32x32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
fir.has_value %2 : !fir.array<32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x i32> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
+// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
// -----
@@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
-// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
+// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: !llvm.array<4294967296 x i8>
// -----
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 21004e6a15d495..ba23635160e062 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6741,6 +6741,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
}
+static Value *removeASCastIfPresent(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
+ return cast<Operator>(V)->getOperand(0);
+ return V;
+}
+
static Expected<Function *> createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
SmallVectorImpl<Value *> &Inputs,
@@ -6902,9 +6908,9 @@ static Expected<Function *> createOutlinedFunction(
// preceding mapped arguments that refer to the same global that may be
// seperate segments. To prevent this, we defer global processing until all
// other processing has been performed.
- if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
+ if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
continue;
}
|
a59c4fb
to
2757285
Compare
Small ping for some attention on this PR if at all possible please :-) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this. I recently hit this issue (address space of global) as well while working on the debug for declare target variables. Looks ok to me mostly apart from a few nits.
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type, | ||
symName); | ||
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The replaceOp
makes this function a bit more difficult to read. I was wondering if having 2 helper functions would have been cleaner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Split it into two seperate functions, hopefully it's a little more readable and is what you expected! :-)
@@ -1255,14 +1293,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { | |||
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName()) | |||
: fir::NameUniquer::getTypeDescriptorName(recType.getName()); | |||
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext()); | |||
mlir::DataLayout dataLayout(mod); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The construction of mlir::DataLayout may be expensive so I wonder if there is way to use an already constructed one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a call to getOrSetDataLayout! So should retrieve one if it's been constructed already or create one (more likely retrieve one as the function is called on generation of the code gen passes).
auto module = mlir::isa<mlir::ModuleOp>(parentOp) | ||
? mlir::cast<mlir::ModuleOp>(parentOp) | ||
: parentOp->getParentOfType<mlir::ModuleOp>(); | ||
if (module) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change and the one below is not exactly required for this PR. I guess they are small enough to include in this but could go in a separate PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, I would like to keep them in this PR, as the main reason I found this problem is that it does result in an error in this PR stack and they do fit in the overall scope of this particular PR I think in adding/addressing various quirks with the address spaces :-)
2757285
to
d18c299
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@abidh Addressed your comments as best I could, hopefully they're reasonable changes! :-) Unfortunately, I've also done a rebase which breaks the PR as it brings in some incompatible changes that'll need fixed in a seperate PR as it's related to extending the getDataLayout utilities to support the new GPU module that's occasionally in use. But it doesn't make sense to add it in this PR I think, so I'll do so in a seperate one I'll open in the near future (hopefully today) :-) And thank you very much for the review! Took me a bit to get around to addressing them as I'd hoped the other segments of the stack would have received some reviews by now so I could do it all at once, alas it was not to be!
auto module = mlir::isa<mlir::ModuleOp>(parentOp) | ||
? mlir::cast<mlir::ModuleOp>(parentOp) | ||
: parentOp->getParentOfType<mlir::ModuleOp>(); | ||
if (module) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, I would like to keep them in this PR, as the main reason I found this problem is that it does result in an error in this PR stack and they do fit in the overall scope of this particular PR I think in adding/addressing various quirks with the address spaces :-)
@@ -1255,14 +1293,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { | |||
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName()) | |||
: fir::NameUniquer::getTypeDescriptorName(recType.getName()); | |||
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext()); | |||
mlir::DataLayout dataLayout(mod); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a call to getOrSetDataLayout! So should retrieve one if it's been constructed already or create one (more likely retrieve one as the function is called on generation of the code gen passes).
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type, | ||
symName); | ||
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Split it into two seperate functions, hopefully it's a little more readable and is what you expected! :-)
Thanks. I skimmed through your changes and they look ok to me. |
Currently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily). This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness. Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla@amd.com>
d18c299
to
c7263e7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have some minor nits and suggestions to hopefully improve code readability, but LGTM.
mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod) | ||
? false | ||
: true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod) | |
? false | |
: true); | |
mod, /*allowDefaultLayout=*/!mlir::isa<mlir::gpu::GPUModuleOp>(mod)); |
// We allow the module to be set to a default layout if it's a regular module | ||
// however, we prevent this if it's a GPU module, as the datalayout in these | ||
// cases will currently be the union of the GPU Module and the parent builtin | ||
// module, with the GPU module overriding the parent where there are duplicates. | ||
// However, if we force the default layout onto a GPU module, with no datalayout | ||
// it'll result in issues as the infrastructure does not support the union of | ||
// two layouts with builtin data layout entries currently (and it doesn't look | ||
// like it was intended to). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// We allow the module to be set to a default layout if it's a regular module | |
// however, we prevent this if it's a GPU module, as the datalayout in these | |
// cases will currently be the union of the GPU Module and the parent builtin | |
// module, with the GPU module overriding the parent where there are duplicates. | |
// However, if we force the default layout onto a GPU module, with no datalayout | |
// it'll result in issues as the infrastructure does not support the union of | |
// two layouts with builtin data layout entries currently (and it doesn't look | |
// like it was intended to). | |
// We allow the module to be set to a default layout if it's a regular module. | |
// However, we prevent this if it's a GPU module, as the datalayout in these | |
// cases will currently be the union of the GPU Module and the parent builtin | |
// module, with the GPU module overriding the parent where there are duplicates. | |
// However, if we force the default layout onto a GPU module, with no datalayout | |
// it'll result in issues as the infrastructure does not support the union of | |
// two layouts with builtin data layout entries. |
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) { | ||
if (globalAS != programAS) { | ||
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | ||
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | ||
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>( | ||
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS), | ||
llvmAddrOp); | ||
} | ||
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>( | ||
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | ||
} | ||
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type, | ||
symName); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Feel free to disagree, but it seems like the logic here could be simplified a bit with something like this:
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) { | |
if (globalAS != programAS) { | |
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | |
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>( | |
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS), | |
llvmAddrOp); | |
} | |
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>( | |
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | |
} | |
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type, | |
symName); | |
bool isPointerType = mlir::isa<mlir::LLVM::LLVMPointerType>(type); | |
if (isPointerType) | |
type = getLlvmPtrType(rewriter.getContext(), globalAS) | |
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | |
loc, type, symName); | |
if (isPointerType && globalAS != programAS) | |
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>( | |
replaceOp, getLlvmPtrType(rewriter.getContext(), programAS), | |
llvmAddrOp); | |
return rewriter.replaceOp(replaceOp, llvmAddrOp); |
if (globalAS != programAS) { | ||
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | ||
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | ||
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>( | ||
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp); | ||
} | ||
return rewriter.create<mlir::LLVM::AddressOfOp>( | ||
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (globalAS != programAS) { | |
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | |
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp); | |
} | |
return rewriter.create<mlir::LLVM::AddressOfOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | |
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName); | |
if (globalAS != programAS) | |
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>( | |
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp); | |
return llvmAddrOp; |
Currently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily).
This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness
1/3 required PRs to enable declare target to mapping, should look at PR 3/3 to check for full green passes (this one will fail a number due to some dependencies).
Co-authored-by: Raghu Maddhipatla raghu.maddhipatla@amd.com