Skip to content

Commit c7263e7

Browse files
[Flang][OpenMP] Add global address space to globals for target device
Currently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily). This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness. Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla@amd.com>
1 parent 9ba27ca commit c7263e7

File tree

7 files changed

+182
-31
lines changed

7 files changed

+182
-31
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

+4
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
804804
/// Get the address space which should be used for allocas
805805
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);
806806

807+
uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
808+
809+
uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
810+
807811
} // namespace fir::factory
808812

809813
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H

flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h

+3
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
185185
unsigned
186186
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
187187

188+
unsigned
189+
getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
190+
188191
const fir::FIRToLLVMPassOptions &options;
189192

190193
using ConvertToLLVMPattern::match;

flang/lib/Optimizer/Builder/FIRBuilder.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -1740,3 +1740,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
17401740
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
17411741
return 0;
17421742
}
1743+
1744+
uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
1745+
if (dataLayout)
1746+
if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
1747+
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
1748+
return 0;
1749+
}
1750+
1751+
uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
1752+
if (dataLayout)
1753+
if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
1754+
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
1755+
return 0;
1756+
}

flang/lib/Optimizer/CodeGen/CodeGen.cpp

+86-13
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,65 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
134134
}
135135

136136
namespace {
137+
138+
// Creates an existing operation with an AddressOfOp or an AddrSpaceCastOp
139+
// depending on the existing address spaces of the type.
140+
mlir::Value createAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
141+
mlir::Location loc, std::uint64_t globalAS,
142+
std::uint64_t programAS,
143+
llvm::StringRef symName, mlir::Type type) {
144+
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
145+
if (globalAS != programAS) {
146+
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
147+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
148+
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
149+
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
150+
}
151+
return rewriter.create<mlir::LLVM::AddressOfOp>(
152+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
153+
}
154+
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
155+
}
156+
157+
// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
158+
// depending on the existing address spaces of the type.
159+
mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
160+
mlir::Location loc,
161+
std::uint64_t globalAS,
162+
std::uint64_t programAS,
163+
llvm::StringRef symName, mlir::Type type,
164+
mlir::Operation *replaceOp) {
165+
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
166+
if (globalAS != programAS) {
167+
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
168+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
169+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
170+
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
171+
llvmAddrOp);
172+
}
173+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
174+
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
175+
}
176+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
177+
symName);
178+
}
179+
137180
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
138181
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
139182
using FIROpConversion::FIROpConversion;
140183

141184
llvm::LogicalResult
142185
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
143186
mlir::ConversionPatternRewriter &rewriter) const override {
144-
auto ty = convertType(addr.getType());
145-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
146-
addr, ty, addr.getSymbol().getRootReference().getValue());
187+
auto global = addr->getParentOfType<mlir::ModuleOp>()
188+
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
189+
replaceWithAddrOfOrASCast(
190+
rewriter, addr->getLoc(),
191+
global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
192+
getProgramAddressSpace(rewriter),
193+
global ? global.getSymName()
194+
: addr.getSymbol().getRootReference().getValue(),
195+
convertType(addr.getType()), addr);
147196
return mlir::success();
148197
}
149198
};
@@ -1350,14 +1399,34 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
13501399
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
13511400
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
13521401
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());
1402+
1403+
// We allow the module to be set to a default layout if it's a regular module
1404+
// however, we prevent this if it's a GPU module, as the datalayout in these
1405+
// cases will currently be the union of the GPU Module and the parent builtin
1406+
// module, with the GPU module overriding the parent where there are duplicates.
1407+
// However, if we force the default layout onto a GPU module, with no datalayout
1408+
// it'll result in issues as the infrastructure does not support the union of
1409+
// two layouts with builtin data layout entries currently (and it doesn't look
1410+
// like it was intended to).
1411+
std::optional<mlir::DataLayout> dataLayout =
1412+
fir::support::getOrSetMLIRDataLayout(
1413+
mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod)
1414+
? false
1415+
: true);
1416+
assert(dataLayout.has_value() && "Module missing DataLayout information");
1417+
13531418
if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
1354-
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
1355-
global.getSymName());
1419+
return createAddrOfOrASCast(
1420+
rewriter, loc, fir::factory::getGlobalAddressSpace(&*dataLayout),
1421+
fir::factory::getProgramAddressSpace(&*dataLayout),
1422+
global.getSymName(), llvmPtrTy);
13561423
}
13571424
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
13581425
// The global may have already been translated to LLVM.
1359-
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
1360-
global.getSymName());
1426+
return createAddrOfOrASCast(
1427+
rewriter, loc, global.getAddrSpace(),
1428+
fir::factory::getProgramAddressSpace(&*dataLayout),
1429+
global.getSymName(), llvmPtrTy);
13611430
}
13621431
// Type info derived types do not have type descriptors since they are the
13631432
// types defining type descriptors.
@@ -2896,12 +2965,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
28962965
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
28972966
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
28982967
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
2899-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
2900-
typeDescOp, llvmPtrTy, global.getSymName());
2968+
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
2969+
global.getAddrSpace(),
2970+
getProgramAddressSpace(rewriter),
2971+
global.getSymName(), llvmPtrTy, typeDescOp);
29012972
return mlir::success();
29022973
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
2903-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
2904-
typeDescOp, llvmPtrTy, global.getSymName());
2974+
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
2975+
getGlobalAddressSpace(rewriter),
2976+
getProgramAddressSpace(rewriter),
2977+
global.getSymName(), llvmPtrTy, typeDescOp);
29052978
return mlir::success();
29062979
}
29072980
return mlir::failure();
@@ -2992,8 +3065,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
29923065
mlir::SymbolRefAttr comdat;
29933066
llvm::ArrayRef<mlir::NamedAttribute> attrs;
29943067
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
2995-
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
2996-
false, false, comdat, attrs, dbgExprs);
3068+
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
3069+
getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
29973070

29983071
if (global.getAlignment() && *global.getAlignment() > 0)
29993072
g.setAlignment(*global.getAlignment());

flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
346346
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
347347
assert(parentOp != nullptr &&
348348
"expected insertion block to have parent operation");
349-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
349+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
350+
? mlir::cast<mlir::ModuleOp>(parentOp)
351+
: parentOp->getParentOfType<mlir::ModuleOp>();
352+
if (module)
350353
if (mlir::Attribute addrSpace =
351354
mlir::DataLayout(module).getAllocaMemorySpace())
352355
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
358361
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
359362
assert(parentOp != nullptr &&
360363
"expected insertion block to have parent operation");
361-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
364+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
365+
? mlir::cast<mlir::ModuleOp>(parentOp)
366+
: parentOp->getParentOfType<mlir::ModuleOp>();
367+
if (module)
362368
if (mlir::Attribute addrSpace =
363369
mlir::DataLayout(module).getProgramMemorySpace())
364370
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
365371
return defaultAddressSpace;
366372
}
367373

374+
unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
375+
mlir::ConversionPatternRewriter &rewriter) const {
376+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
377+
assert(parentOp != nullptr &&
378+
"expected insertion block to have parent operation");
379+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
380+
? mlir::cast<mlir::ModuleOp>(parentOp)
381+
: parentOp->getParentOfType<mlir::ModuleOp>();
382+
if (module)
383+
if (mlir::Attribute addrSpace =
384+
mlir::DataLayout(module).getGlobalMemorySpace())
385+
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
386+
return defaultAddressSpace;
387+
}
388+
368389
} // namespace fir

flang/test/Fir/convert-to-llvm.fir

+43-13
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
44
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
55
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
6-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
6+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
88

99
//===================================================
1010
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
1717
fir.has_value %1 : i32
1818
}
1919

20-
// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
20+
// CHECK: llvm.mlir.global external @g_i0()
21+
// GENERIC-SAME: {addr_space = 0 : i32}
22+
// AMDGPU-SAME: {addr_space = 1 : i32}
23+
// CHECK-SAME: i32 {
2124
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
2225
// CHECK: llvm.return %[[C0]] : i32
2326
// CHECK: }
@@ -29,25 +32,37 @@ fir.global @g_ci5 constant : i32 {
2932
fir.has_value %c : i32
3033
}
3134

32-
// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
35+
// CHECK: llvm.mlir.global external constant @g_ci5()
36+
// GENERIC-SAME: {addr_space = 0 : i32}
37+
// AMDGPU-SAME: {addr_space = 1 : i32}
38+
// CHECK-SAME: i32 {
3339
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
3440
// CHECK: llvm.return %[[C5]] : i32
3541
// CHECK: }
3642

3743
// -----
3844

3945
fir.global internal @i_i515 (515:i32) : i32
40-
// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
46+
// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
47+
// GENERIC-SAME: {addr_space = 0 : i32}
48+
// AMDGPU-SAME: {addr_space = 1 : i32}
49+
// CHECK-SAME: : i32
4150

4251
// -----
4352

4453
fir.global common @C_i511 (0:i32) : i32
45-
// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
54+
// CHECK: llvm.mlir.global common @C_i511(0 : i32)
55+
// GENERIC-SAME: {addr_space = 0 : i32}
56+
// AMDGPU-SAME: {addr_space = 1 : i32}
57+
// CHECK-SAME: : i32
4658

4759
// -----
4860

4961
fir.global weak @w_i86 (86:i32) : i32
50-
// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
62+
// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
63+
// GENERIC-SAME: {addr_space = 0 : i32}
64+
// AMDGPU-SAME: {addr_space = 1 : i32}
65+
// CHECK-SAME: : i32
5166

5267
// -----
5368

@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
6984
fir.has_value %0 : i64
7085
}
7186

72-
// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
87+
// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
88+
// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
7389

74-
// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
90+
// CHECK: llvm.mlir.global external @[[SYMBOL]]()
91+
// GENERIC-SAME: {addr_space = 0 : i32}
92+
// AMDGPU-SAME: {addr_space = 1 : i32}
93+
// CHECK-SAME: i64 {
7594
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
7695
// CHECK: llvm.return %{{.*}} : i64
7796
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
88107
fir.has_value %2 : !fir.array<32x32xi32>
89108
}
90109

91-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
110+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
111+
// GENERIC-SAME: {addr_space = 0 : i32}
112+
// AMDGPU-SAME: {addr_space = 1 : i32}
113+
// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
92114
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
93115
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
94116
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
105127
fir.has_value %2 : !fir.array<32xi32>
106128
}
107129

108-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
130+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
131+
// GENERIC-SAME: {addr_space = 0 : i32}
132+
// AMDGPU-SAME: {addr_space = 1 : i32}
133+
// CHECK-SAME: : !llvm.array<32 x i32> {
109134
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
110135
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
111136
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
17871812
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
17881813
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
17891814
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
1790-
// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1815+
// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1816+
// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
1817+
// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
17911818
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
17921819

17931820
// -----
@@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
27752802

27762803
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
27772804

2778-
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
2805+
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
2806+
// GENERIC-SAME: {addr_space = 0 : i32}
2807+
// AMDGPU-SAME: {addr_space = 1 : i32}
2808+
// CHECK-SAME: !llvm.array<4294967296 x i8>
27792809

27802810
// -----
27812811

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -6743,6 +6743,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
67436743
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
67446744
}
67456745

6746+
static Value *removeASCastIfPresent(Value *V) {
6747+
if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
6748+
return cast<Operator>(V)->getOperand(0);
6749+
return V;
6750+
}
6751+
67466752
static Expected<Function *> createOutlinedFunction(
67476753
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
67486754
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
@@ -6926,9 +6932,9 @@ static Expected<Function *> createOutlinedFunction(
69266932
// preceding mapped arguments that refer to the same global that may be
69276933
// seperate segments. To prevent this, we defer global processing until all
69286934
// other processing has been performed.
6929-
if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
6930-
llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
6931-
llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
6935+
if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
6936+
llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
6937+
llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
69326938
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
69336939
continue;
69346940
}

0 commit comments

Comments
 (0)