Skip to content

Commit 4f57274

Browse files
committed
[Flang][MLIR] Add basic initial support for alloca and program address space handling in FIR->LLVMIR codegen
This is a slightly more slimmed down and up-to-date version of the older PR from here: https://reviews.llvm.org/D144203, written by @jsjodin, which has already under gone some review. This PR places allocas in the alloca address space specified by the provided datalayout (default is 0 for all address spaces), and then will cast these alloca's to the program address space if this address space is different from the allocation address space. For most architectures data layouts, this will be a no-op, as they have a flat address space. But in the case of AMDGPU it will result in allocas being placed in the correct address space (5, private), and then casted into the correct program address space (0, generic). This results in correct (partially, a follow up PR will be forthcoming soon) generation of allocations inside of device code. This PR is in addition to the work by @skatrak in this PR: #69599 and adds seperate and neccesary functionality of casting alloca's from their address space to the program address space, both are independent PRs, although there is some minor overlap e.g. this PR incorporates some of the useful helper functions from 69599, so whichever lands first will need a minor rebase. Co-author: jsjodin
1 parent 79e17cd commit 4f57274

File tree

3 files changed

+159
-42
lines changed

3 files changed

+159
-42
lines changed

flang/include/flang/Optimizer/CodeGen/CGPasses.td

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> {
2727
let options = [
2828
Option<"forcedTargetTriple", "target", "std::string", /*default=*/"",
2929
"Override module's target triple.">,
30+
Option<"forcedDataLayout", "datalayout", "std::string", /*default=*/"",
31+
"Override module's data layout.">,
3032
Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false",
3133
"Attach TBAA tags to memory accessing operations.">
3234
];

flang/lib/Optimizer/CodeGen/CodeGen.cpp

+83-10
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "mlir/IR/Matchers.h"
4545
#include "mlir/Pass/Pass.h"
4646
#include "mlir/Pass/PassManager.h"
47+
#include "mlir/Target/LLVMIR/Import.h"
4748
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
4849
#include "llvm/ADT/ArrayRef.h"
4950
#include "llvm/ADT/TypeSwitch.h"
@@ -67,8 +68,41 @@ static constexpr unsigned defaultAlign = 8;
6768
static constexpr unsigned kAttrPointer = CFI_attribute_pointer;
6869
static constexpr unsigned kAttrAllocatable = CFI_attribute_allocatable;
6970

70-
static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context) {
71-
return mlir::LLVM::LLVMPointerType::get(context);
71+
static inline unsigned getAllocaAddressSpace(mlir::ModuleOp module) {
72+
if (mlir::Attribute addrSpace =
73+
mlir::DataLayout(module).getAllocaMemorySpace())
74+
return addrSpace.cast<mlir::IntegerAttr>().getUInt();
75+
76+
return 0u;
77+
}
78+
79+
static inline unsigned getProgramAddressSpace(mlir::ModuleOp module) {
80+
if (mlir::Attribute addrSpace =
81+
mlir::DataLayout(module).getProgramMemorySpace())
82+
return addrSpace.cast<mlir::IntegerAttr>().getUInt();
83+
84+
return 0u;
85+
}
86+
87+
static inline unsigned
88+
getAllocaAddressSpace(mlir::ConversionPatternRewriter &rewriter) {
89+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
90+
return parentOp ? ::getAllocaAddressSpace(
91+
parentOp->getParentOfType<mlir::ModuleOp>())
92+
: 0u;
93+
}
94+
95+
static inline unsigned
96+
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) {
97+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
98+
return parentOp ? ::getProgramAddressSpace(
99+
parentOp->getParentOfType<mlir::ModuleOp>())
100+
: 0u;
101+
}
102+
103+
static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context,
104+
unsigned addressSpace = 0) {
105+
return mlir::LLVM::LLVMPointerType::get(context, addressSpace);
72106
}
73107

74108
static inline mlir::Type getI8Type(mlir::MLIRContext *context) {
@@ -369,7 +403,7 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern<FromOp> {
369403
}
370404

371405
// Generate an alloca of size 1 for an object of type \p llvmObjectTy.
372-
mlir::LLVM::AllocaOp
406+
mlir::Value
373407
genAllocaWithType(mlir::Location loc, mlir::Type llvmObjectTy,
374408
unsigned alignment,
375409
mlir::ConversionPatternRewriter &rewriter) const {
@@ -378,9 +412,23 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern<FromOp> {
378412
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
379413
rewriter.setInsertionPointToStart(insertBlock);
380414
auto size = genI32Constant(loc, rewriter, 1);
381-
mlir::Type llvmPtrTy = ::getLlvmPtrType(llvmObjectTy.getContext());
382-
auto al = rewriter.create<mlir::LLVM::AllocaOp>(
383-
loc, llvmPtrTy, llvmObjectTy, size, alignment);
415+
unsigned allocaAs = getAllocaAddressSpace(rewriter);
416+
unsigned programAs = getProgramAddressSpace(rewriter);
417+
418+
mlir::Value al = rewriter.create<mlir::LLVM::AllocaOp>(
419+
loc, ::getLlvmPtrType(llvmObjectTy.getContext(), allocaAs),
420+
llvmObjectTy, size, alignment);
421+
422+
// if our allocation address space, is not the same as the program address
423+
// space, then we must emit a cast to the program address space before use.
424+
// An example case would be on AMDGPU, where the allocation address space is
425+
// the numeric value 5 (private), and the program address space is 0
426+
// (generic).
427+
if (allocaAs != programAs) {
428+
al = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
429+
loc, ::getLlvmPtrType(llvmObjectTy.getContext(), programAs), al);
430+
}
431+
384432
rewriter.restoreInsertionPoint(thisPt);
385433
return al;
386434
}
@@ -532,20 +580,34 @@ struct AllocaOpConversion : public FIROpConversion<fir::AllocaOp> {
532580
size = rewriter.create<mlir::LLVM::MulOp>(
533581
loc, ity, size, integerCast(loc, rewriter, ity, operands[i]));
534582
}
535-
mlir::Type llvmPtrTy = ::getLlvmPtrType(alloc.getContext());
583+
584+
unsigned allocaAs = getAllocaAddressSpace(rewriter);
585+
unsigned programAs = getProgramAddressSpace(rewriter);
586+
536587
// NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
537588
// pointers! Only propagate pinned and bindc_name to help debugging, but
538589
// this should have no functional purpose (and passing the operand segment
539590
// attribute like before is certainly bad).
540591
auto llvmAlloc = rewriter.create<mlir::LLVM::AllocaOp>(
541-
loc, llvmPtrTy, llvmObjectType, size);
592+
loc, ::getLlvmPtrType(alloc.getContext(), allocaAs), llvmObjectType,
593+
size);
542594
if (alloc.getPinned())
543595
llvmAlloc->setDiscardableAttr(alloc.getPinnedAttrName(),
544596
alloc.getPinnedAttr());
545597
if (alloc.getBindcName())
546598
llvmAlloc->setDiscardableAttr(alloc.getBindcNameAttrName(),
547599
alloc.getBindcNameAttr());
548-
rewriter.replaceOp(alloc, llvmAlloc);
600+
if (allocaAs == programAs) {
601+
rewriter.replaceOp(alloc, llvmAlloc);
602+
} else {
603+
// if our allocation address space, is not the same as the program address
604+
// space, then we must emit a cast to the program address space before
605+
// use. An example case would be on AMDGPU, where the allocation address
606+
// space is the numeric value 5 (private), and the program address space
607+
// is 0 (generic).
608+
rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
609+
alloc, ::getLlvmPtrType(alloc.getContext(), programAs), llvmAlloc);
610+
}
549611
return mlir::success();
550612
}
551613
};
@@ -3114,7 +3176,7 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
31143176
auto storeOp =
31153177
rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
31163178
attachTBAATag(storeOp, boxTy, boxTy, nullptr);
3117-
rewriter.replaceOp(load, newBoxStorage.getResult());
3179+
rewriter.replaceOp(load, newBoxStorage);
31183180
} else {
31193181
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
31203182
load.getLoc(), llvmLoadTy, adaptor.getOperands(), load->getAttrs());
@@ -3808,6 +3870,17 @@ class FIRToLLVMLowering
38083870
if (!forcedTargetTriple.empty())
38093871
fir::setTargetTriple(mod, forcedTargetTriple);
38103872

3873+
if (!forcedDataLayout.empty()) {
3874+
llvm::DataLayout dl(forcedDataLayout);
3875+
mlir::MLIRContext *context = mod.getContext();
3876+
mod->setAttr(
3877+
mlir::LLVM::LLVMDialect::getDataLayoutAttrName(),
3878+
mlir::StringAttr::get(context, dl.getStringRepresentation()));
3879+
mlir::DataLayoutSpecInterface dlSpec =
3880+
mlir::translateDataLayout(dl, context);
3881+
mod->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec);
3882+
}
3883+
38113884
// Run dynamic pass pipeline for converting Math dialect
38123885
// operations into other dialects (llvm, func, etc.).
38133886
// Some conversions of Math operations cannot be done

0 commit comments

Comments
 (0)