Skip to content

[Flang][OpenMP] Add global address space to globals for target device #119585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/Builder/FIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
/// Get the address space which should be used for allocas
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);

uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);

uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);

} // namespace fir::factory

#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
3 changes: 3 additions & 0 deletions flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
unsigned
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;

unsigned
getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;

const fir::FIRToLLVMPassOptions &options;

using ConvertToLLVMPattern::match;
Expand Down
14 changes: 14 additions & 0 deletions flang/lib/Optimizer/Builder/FIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1740,3 +1740,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}

uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
if (dataLayout)
if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}

uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
if (dataLayout)
if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}
99 changes: 86 additions & 13 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,65 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
}

namespace {

// Creates an existing operation with an AddressOfOp or an AddrSpaceCastOp
// depending on the existing address spaces of the type.
mlir::Value createAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc, std::uint64_t globalAS,
std::uint64_t programAS,
llvm::StringRef symName, mlir::Type type) {
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
if (globalAS != programAS) {
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
}
return rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
Comment on lines +145 to +152
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (globalAS != programAS) {
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
}
return rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
if (globalAS != programAS)
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
return llvmAddrOp;

}
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The replaceOp makes this function a bit more difficult to read. I was wondering if having 2 helper functions would have been cleaner.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Split it into two seperate functions, hopefully it's a little more readable and is what you expected! :-)


// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
// depending on the existing address spaces of the type.
mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc,
std::uint64_t globalAS,
std::uint64_t programAS,
llvm::StringRef symName, mlir::Type type,
mlir::Operation *replaceOp) {
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
if (globalAS != programAS) {
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
llvmAddrOp);
}
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
}
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
symName);
Comment on lines +165 to +177
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Feel free to disagree, but it seems like the logic here could be simplified a bit with something like this:

Suggested change
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
if (globalAS != programAS) {
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
llvmAddrOp);
}
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
}
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
symName);
bool isPointerType = mlir::isa<mlir::LLVM::LLVMPointerType>(type);
if (isPointerType)
type = getLlvmPtrType(rewriter.getContext(), globalAS)
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
loc, type, symName);
if (isPointerType && globalAS != programAS)
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
replaceOp, getLlvmPtrType(rewriter.getContext(), programAS),
llvmAddrOp);
return rewriter.replaceOp(replaceOp, llvmAddrOp);

}

/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
using FIROpConversion::FIROpConversion;

llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
auto ty = convertType(addr.getType());
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
addr, ty, addr.getSymbol().getRootReference().getValue());
auto global = addr->getParentOfType<mlir::ModuleOp>()
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
replaceWithAddrOfOrASCast(
rewriter, addr->getLoc(),
global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
getProgramAddressSpace(rewriter),
global ? global.getSymName()
: addr.getSymbol().getRootReference().getValue(),
convertType(addr.getType()), addr);
return mlir::success();
}
};
Expand Down Expand Up @@ -1350,14 +1399,34 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());

// We allow the module to be set to a default layout if it's a regular module
// however, we prevent this if it's a GPU module, as the datalayout in these
// cases will currently be the union of the GPU Module and the parent builtin
// module, with the GPU module overriding the parent where there are duplicates.
// However, if we force the default layout onto a GPU module, with no datalayout
// it'll result in issues as the infrastructure does not support the union of
// two layouts with builtin data layout entries currently (and it doesn't look
// like it was intended to).
Comment on lines +1403 to +1410
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// We allow the module to be set to a default layout if it's a regular module
// however, we prevent this if it's a GPU module, as the datalayout in these
// cases will currently be the union of the GPU Module and the parent builtin
// module, with the GPU module overriding the parent where there are duplicates.
// However, if we force the default layout onto a GPU module, with no datalayout
// it'll result in issues as the infrastructure does not support the union of
// two layouts with builtin data layout entries currently (and it doesn't look
// like it was intended to).
// We allow the module to be set to a default layout if it's a regular module.
// However, we prevent this if it's a GPU module, as the datalayout in these
// cases will currently be the union of the GPU Module and the parent builtin
// module, with the GPU module overriding the parent where there are duplicates.
// However, if we force the default layout onto a GPU module, with no datalayout
// it'll result in issues as the infrastructure does not support the union of
// two layouts with builtin data layout entries.

std::optional<mlir::DataLayout> dataLayout =
fir::support::getOrSetMLIRDataLayout(
mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod)
? false
: true);
Comment on lines +1413 to +1415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod)
? false
: true);
mod, /*allowDefaultLayout=*/!mlir::isa<mlir::gpu::GPUModuleOp>(mod));

assert(dataLayout.has_value() && "Module missing DataLayout information");

if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
global.getSymName());
return createAddrOfOrASCast(
rewriter, loc, fir::factory::getGlobalAddressSpace(&*dataLayout),
fir::factory::getProgramAddressSpace(&*dataLayout),
global.getSymName(), llvmPtrTy);
}
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
// The global may have already been translated to LLVM.
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
global.getSymName());
return createAddrOfOrASCast(
rewriter, loc, global.getAddrSpace(),
fir::factory::getProgramAddressSpace(&*dataLayout),
global.getSymName(), llvmPtrTy);
}
// Type info derived types do not have type descriptors since they are the
// types defining type descriptors.
Expand Down Expand Up @@ -2896,12 +2965,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
typeDescOp, llvmPtrTy, global.getSymName());
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
global.getAddrSpace(),
getProgramAddressSpace(rewriter),
global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
typeDescOp, llvmPtrTy, global.getSymName());
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
getGlobalAddressSpace(rewriter),
getProgramAddressSpace(rewriter),
global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
}
return mlir::failure();
Expand Down Expand Up @@ -2992,8 +3065,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
mlir::SymbolRefAttr comdat;
llvm::ArrayRef<mlir::NamedAttribute> attrs;
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
false, false, comdat, attrs, dbgExprs);
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);

if (global.getAlignment() && *global.getAlignment() > 0)
g.setAlignment(*global.getAlignment());
Expand Down
25 changes: 23 additions & 2 deletions flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
? mlir::cast<mlir::ModuleOp>(parentOp)
: parentOp->getParentOfType<mlir::ModuleOp>();
if (module)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change and the one below is not exactly required for this PR. I guess they are small enough to include in this but could go in a separate PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I would like to keep them in this PR, as the main reason I found this problem is that it does result in an error in this PR stack and they do fit in the overall scope of this particular PR I think in adding/addressing various quirks with the address spaces :-)

if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getAllocaMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
Expand All @@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
? mlir::cast<mlir::ModuleOp>(parentOp)
: parentOp->getParentOfType<mlir::ModuleOp>();
if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getProgramMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return defaultAddressSpace;
}

unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
mlir::ConversionPatternRewriter &rewriter) const {
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
? mlir::cast<mlir::ModuleOp>(parentOp)
: parentOp->getParentOfType<mlir::ModuleOp>();
if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getGlobalMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return defaultAddressSpace;
}

} // namespace fir
56 changes: 43 additions & 13 deletions flang/test/Fir/convert-to-llvm.fir
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s

//===================================================
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
Expand All @@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
fir.has_value %1 : i32
}

// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
// CHECK: llvm.mlir.global external @g_i0()
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: i32 {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.return %[[C0]] : i32
// CHECK: }
Expand All @@ -29,25 +32,37 @@ fir.global @g_ci5 constant : i32 {
fir.has_value %c : i32
}

// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
// CHECK: llvm.mlir.global external constant @g_ci5()
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: i32 {
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
// CHECK: llvm.return %[[C5]] : i32
// CHECK: }

// -----

fir.global internal @i_i515 (515:i32) : i32
// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: : i32

// -----

fir.global common @C_i511 (0:i32) : i32
// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
// CHECK: llvm.mlir.global common @C_i511(0 : i32)
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: : i32

// -----

fir.global weak @w_i86 (86:i32) : i32
// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: : i32

// -----

Expand All @@ -69,9 +84,13 @@ fir.global @symbol : i64 {
fir.has_value %0 : i64
}

// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr

// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
// CHECK: llvm.mlir.global external @[[SYMBOL]]()
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: i64 {
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.return %{{.*}} : i64
// CHECK: }
Expand All @@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
fir.has_value %2 : !fir.array<32x32xi32>
}

// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
// CHECK: llvm.mlir.global internal @_QEmultiarray()
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
// CHECK: }
Expand All @@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
fir.has_value %2 : !fir.array<32xi32>
}

// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
// CHECK: llvm.mlir.global internal @_QEmultiarray()
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: : !llvm.array<32 x i32> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
Expand Down Expand Up @@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>

// -----
Expand Down Expand Up @@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>

fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>

// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
// GENERIC-SAME: {addr_space = 0 : i32}
// AMDGPU-SAME: {addr_space = 1 : i32}
// CHECK-SAME: !llvm.array<4294967296 x i8>

// -----

Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6743,6 +6743,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
}

static Value *removeASCastIfPresent(Value *V) {
if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
return cast<Operator>(V)->getOperand(0);
return V;
}

static Expected<Function *> createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
Expand Down Expand Up @@ -6926,9 +6932,9 @@ static Expected<Function *> createOutlinedFunction(
// preceding mapped arguments that refer to the same global that may be
// seperate segments. To prevent this, we defer global processing until all
// other processing has been performed.
if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
continue;
}
Expand Down
Loading