Skip to content

Commit d18c299

Browse files
[Flang][OpenMP] Add global address space to globals for target device
Currently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily). This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness. Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla@amd.com>
1 parent 9ba27ca commit d18c299

File tree

7 files changed

+174
-31
lines changed

7 files changed

+174
-31
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

+4
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
804804
/// Get the address space which should be used for allocas
805805
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);
806806

807+
uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
808+
809+
uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
810+
807811
} // namespace fir::factory
808812

809813
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H

flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h

+3
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
185185
unsigned
186186
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
187187

188+
unsigned
189+
getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
190+
188191
const fir::FIRToLLVMPassOptions &options;
189192

190193
using ConvertToLLVMPattern::match;

flang/lib/Optimizer/Builder/FIRBuilder.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -1740,3 +1740,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
17401740
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
17411741
return 0;
17421742
}
1743+
1744+
uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
1745+
if (dataLayout)
1746+
if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
1747+
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
1748+
return 0;
1749+
}
1750+
1751+
uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
1752+
if (dataLayout)
1753+
if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
1754+
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
1755+
return 0;
1756+
}

flang/lib/Optimizer/CodeGen/CodeGen.cpp

+78-13
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,65 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
134134
}
135135

136136
namespace {
137+
138+
// Creates an existing operation with an AddressOfOp or an AddrSpaceCastOp
139+
// depending on the existing address spaces of the type.
140+
mlir::Value createAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
141+
mlir::Location loc, std::uint64_t globalAS,
142+
std::uint64_t programAS,
143+
llvm::StringRef symName, mlir::Type type) {
144+
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
145+
if (globalAS != programAS) {
146+
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
147+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
148+
return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
149+
loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
150+
}
151+
return rewriter.create<mlir::LLVM::AddressOfOp>(
152+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
153+
}
154+
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
155+
}
156+
157+
// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
158+
// depending on the existing address spaces of the type.
159+
mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
160+
mlir::Location loc,
161+
std::uint64_t globalAS,
162+
std::uint64_t programAS,
163+
llvm::StringRef symName, mlir::Type type,
164+
mlir::Operation *replaceOp) {
165+
if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
166+
if (globalAS != programAS) {
167+
auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
168+
loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
169+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
170+
replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
171+
llvmAddrOp);
172+
}
173+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
174+
replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
175+
}
176+
return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
177+
symName);
178+
}
179+
137180
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
138181
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
139182
using FIROpConversion::FIROpConversion;
140183

141184
llvm::LogicalResult
142185
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
143186
mlir::ConversionPatternRewriter &rewriter) const override {
144-
auto ty = convertType(addr.getType());
145-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
146-
addr, ty, addr.getSymbol().getRootReference().getValue());
187+
auto global = addr->getParentOfType<mlir::ModuleOp>()
188+
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
189+
replaceWithAddrOfOrASCast(
190+
rewriter, addr->getLoc(),
191+
global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
192+
getProgramAddressSpace(rewriter),
193+
global ? global.getSymName()
194+
: addr.getSymbol().getRootReference().getValue(),
195+
convertType(addr.getType()), addr);
147196
return mlir::success();
148197
}
149198
};
@@ -1350,14 +1399,26 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
13501399
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
13511400
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
13521401
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());
1402+
1403+
// As we set allowDefaultLayout to true, there should be no chance the
1404+
// optional returns null even if the module has no layout information,
1405+
// however, assert just incase.
1406+
std::optional<mlir::DataLayout> dataLayout =
1407+
fir::support::getOrSetDataLayout(mod, /*allowDefaultLayout=*/true);
1408+
assert(!dataLayout.has_value());
1409+
13531410
if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
1354-
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
1355-
global.getSymName());
1411+
return createAddrOfOrASCast(
1412+
rewriter, loc, fir::factory::getGlobalAddressSpace(&*dataLayout),
1413+
fir::factory::getProgramAddressSpace(&*dataLayout),
1414+
global.getSymName(), llvmPtrTy);
13561415
}
13571416
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
13581417
// The global may have already been translated to LLVM.
1359-
return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
1360-
global.getSymName());
1418+
return createAddrOfOrASCast(
1419+
rewriter, loc, global.getAddrSpace(),
1420+
fir::factory::getProgramAddressSpace(&*dataLayout),
1421+
global.getSymName(), llvmPtrTy);
13611422
}
13621423
// Type info derived types do not have type descriptors since they are the
13631424
// types defining type descriptors.
@@ -2896,12 +2957,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
28962957
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
28972958
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
28982959
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
2899-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
2900-
typeDescOp, llvmPtrTy, global.getSymName());
2960+
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
2961+
global.getAddrSpace(),
2962+
getProgramAddressSpace(rewriter),
2963+
global.getSymName(), llvmPtrTy, typeDescOp);
29012964
return mlir::success();
29022965
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
2903-
rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
2904-
typeDescOp, llvmPtrTy, global.getSymName());
2966+
replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
2967+
getGlobalAddressSpace(rewriter),
2968+
getProgramAddressSpace(rewriter),
2969+
global.getSymName(), llvmPtrTy, typeDescOp);
29052970
return mlir::success();
29062971
}
29072972
return mlir::failure();
@@ -2992,8 +3057,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
29923057
mlir::SymbolRefAttr comdat;
29933058
llvm::ArrayRef<mlir::NamedAttribute> attrs;
29943059
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
2995-
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
2996-
false, false, comdat, attrs, dbgExprs);
3060+
loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
3061+
getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
29973062

29983063
if (global.getAlignment() && *global.getAlignment() > 0)
29993064
g.setAlignment(*global.getAlignment());

flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
346346
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
347347
assert(parentOp != nullptr &&
348348
"expected insertion block to have parent operation");
349-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
349+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
350+
? mlir::cast<mlir::ModuleOp>(parentOp)
351+
: parentOp->getParentOfType<mlir::ModuleOp>();
352+
if (module)
350353
if (mlir::Attribute addrSpace =
351354
mlir::DataLayout(module).getAllocaMemorySpace())
352355
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
358361
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
359362
assert(parentOp != nullptr &&
360363
"expected insertion block to have parent operation");
361-
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
364+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
365+
? mlir::cast<mlir::ModuleOp>(parentOp)
366+
: parentOp->getParentOfType<mlir::ModuleOp>();
367+
if (module)
362368
if (mlir::Attribute addrSpace =
363369
mlir::DataLayout(module).getProgramMemorySpace())
364370
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
365371
return defaultAddressSpace;
366372
}
367373

374+
unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
375+
mlir::ConversionPatternRewriter &rewriter) const {
376+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
377+
assert(parentOp != nullptr &&
378+
"expected insertion block to have parent operation");
379+
auto module = mlir::isa<mlir::ModuleOp>(parentOp)
380+
? mlir::cast<mlir::ModuleOp>(parentOp)
381+
: parentOp->getParentOfType<mlir::ModuleOp>();
382+
if (module)
383+
if (mlir::Attribute addrSpace =
384+
mlir::DataLayout(module).getGlobalMemorySpace())
385+
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
386+
return defaultAddressSpace;
387+
}
388+
368389
} // namespace fir

flang/test/Fir/convert-to-llvm.fir

+43-13
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
44
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
55
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
6-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7-
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
6+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
7+
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
88

99
//===================================================
1010
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
1717
fir.has_value %1 : i32
1818
}
1919

20-
// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
20+
// CHECK: llvm.mlir.global external @g_i0()
21+
// GENERIC-SAME: {addr_space = 0 : i32}
22+
// AMDGPU-SAME: {addr_space = 1 : i32}
23+
// CHECK-SAME: i32 {
2124
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
2225
// CHECK: llvm.return %[[C0]] : i32
2326
// CHECK: }
@@ -29,25 +32,37 @@ fir.global @g_ci5 constant : i32 {
2932
fir.has_value %c : i32
3033
}
3134

32-
// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
35+
// CHECK: llvm.mlir.global external constant @g_ci5()
36+
// GENERIC-SAME: {addr_space = 0 : i32}
37+
// AMDGPU-SAME: {addr_space = 1 : i32}
38+
// CHECK-SAME: i32 {
3339
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
3440
// CHECK: llvm.return %[[C5]] : i32
3541
// CHECK: }
3642

3743
// -----
3844

3945
fir.global internal @i_i515 (515:i32) : i32
40-
// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
46+
// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
47+
// GENERIC-SAME: {addr_space = 0 : i32}
48+
// AMDGPU-SAME: {addr_space = 1 : i32}
49+
// CHECK-SAME: : i32
4150

4251
// -----
4352

4453
fir.global common @C_i511 (0:i32) : i32
45-
// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
54+
// CHECK: llvm.mlir.global common @C_i511(0 : i32)
55+
// GENERIC-SAME: {addr_space = 0 : i32}
56+
// AMDGPU-SAME: {addr_space = 1 : i32}
57+
// CHECK-SAME: : i32
4658

4759
// -----
4860

4961
fir.global weak @w_i86 (86:i32) : i32
50-
// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
62+
// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
63+
// GENERIC-SAME: {addr_space = 0 : i32}
64+
// AMDGPU-SAME: {addr_space = 1 : i32}
65+
// CHECK-SAME: : i32
5166

5267
// -----
5368

@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
6984
fir.has_value %0 : i64
7085
}
7186

72-
// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
87+
// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
88+
// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
7389

74-
// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
90+
// CHECK: llvm.mlir.global external @[[SYMBOL]]()
91+
// GENERIC-SAME: {addr_space = 0 : i32}
92+
// AMDGPU-SAME: {addr_space = 1 : i32}
93+
// CHECK-SAME: i64 {
7594
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
7695
// CHECK: llvm.return %{{.*}} : i64
7796
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
88107
fir.has_value %2 : !fir.array<32x32xi32>
89108
}
90109

91-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
110+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
111+
// GENERIC-SAME: {addr_space = 0 : i32}
112+
// AMDGPU-SAME: {addr_space = 1 : i32}
113+
// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
92114
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
93115
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
94116
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
105127
fir.has_value %2 : !fir.array<32xi32>
106128
}
107129

108-
// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
130+
// CHECK: llvm.mlir.global internal @_QEmultiarray()
131+
// GENERIC-SAME: {addr_space = 0 : i32}
132+
// AMDGPU-SAME: {addr_space = 1 : i32}
133+
// CHECK-SAME: : !llvm.array<32 x i32> {
109134
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
110135
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
111136
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
17871812
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
17881813
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
17891814
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
1790-
// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1815+
// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
1816+
// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
1817+
// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
17911818
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
17921819

17931820
// -----
@@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
27752802

27762803
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
27772804

2778-
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
2805+
// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
2806+
// GENERIC-SAME: {addr_space = 0 : i32}
2807+
// AMDGPU-SAME: {addr_space = 1 : i32}
2808+
// CHECK-SAME: !llvm.array<4294967296 x i8>
27792809

27802810
// -----
27812811

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -6743,6 +6743,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
67436743
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
67446744
}
67456745

6746+
static Value *removeASCastIfPresent(Value *V) {
6747+
if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
6748+
return cast<Operator>(V)->getOperand(0);
6749+
return V;
6750+
}
6751+
67466752
static Expected<Function *> createOutlinedFunction(
67476753
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
67486754
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
@@ -6926,9 +6932,9 @@ static Expected<Function *> createOutlinedFunction(
69266932
// preceding mapped arguments that refer to the same global that may be
69276933
// seperate segments. To prevent this, we defer global processing until all
69286934
// other processing has been performed.
6929-
if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
6930-
llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
6931-
llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
6935+
if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
6936+
llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
6937+
llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
69326938
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
69336939
continue;
69346940
}

0 commit comments

Comments
 (0)