-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[flang] Implement !DIR$ IVDEP directive #133728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-flang-semantics @llvm/pr-subscribers-flang-fir-hlfir Author: Jean-Didier PAILLEUX (JDPailleux) ChangesThis directive tells the compiler to ignore vector dependencies in the following loop and it must be placed before a Sometimes the compiler may not have sufficient information to decide whether a particular loop is vectorizable due to potential dependencies between iterations and the directive is here to tell to the compiler that vectorization is safe with This directive is also equivalent to Patch is 33.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133728.diff 19 Files Affected:
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 003b4358572c1..46b362e9c6c01 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -735,7 +735,8 @@ void genScalarAssignment(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::ExtendedValue &lhs,
const fir::ExtendedValue &rhs,
bool needFinalization = false,
- bool isTemporaryLHS = false);
+ bool isTemporaryLHS = false,
+ std::optional<mlir::ArrayAttr> accessGroups = nullptr);
/// Assign \p rhs to \p lhs. Both \p rhs and \p lhs must be scalar derived
/// types. The assignment follows Fortran intrinsic assignment semantic for
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c8d8ab41552c2..871f4cb1ff847 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -303,7 +303,8 @@ def fir_LoadOp : fir_OneResultOp<"load", [FirAliasTagOpInterface]> {
}];
let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref,
- OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+ OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups);
let builders = [OpBuilder<(ins "mlir::Value":$refVal)>,
OpBuilder<(ins "mlir::Type":$resTy, "mlir::Value":$refVal)>];
@@ -335,8 +336,9 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
let arguments = (ins AnyType:$value,
- Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
- OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+ Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
+ OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups);
let builders = [OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$memref)>];
@@ -2484,15 +2486,13 @@ def fir_CallOp : fir_Op<"call",
```
}];
- let arguments = (ins
- OptionalAttr<SymbolRefAttr>:$callee,
- Variadic<AnyType>:$args,
- OptionalAttr<DictArrayAttr>:$arg_attrs,
- OptionalAttr<DictArrayAttr>:$res_attrs,
- OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
- DefaultValuedAttr<Arith_FastMathAttr,
- "::mlir::arith::FastMathFlags::none">:$fastmath
- );
+ let arguments = (ins OptionalAttr<SymbolRefAttr>:$callee,
+ Variadic<AnyType>:$args, OptionalAttr<DictArrayAttr>:$arg_attrs,
+ OptionalAttr<DictArrayAttr>:$res_attrs,
+ OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups,
+ DefaultValuedAttr<Arith_FastMathAttr,
+ "::mlir::arith::FastMathFlags::none">:$fastmath);
let results = (outs Variadic<AnyType>);
let hasCustomAssemblyFormat = 1;
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index f69930d5b53b3..2d07bc92df60c 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -21,6 +21,7 @@ include "flang/Optimizer/Dialect/FIRAttr.td"
include "flang/Optimizer/Dialect/FortranVariableInterface.td"
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "mlir/IR/BuiltinAttributes.td"
// Base class for FIR operations.
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 9bff2dab974ec..e6f7523868f96 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -204,6 +204,7 @@ class ParseTreeDumper {
NODE(parser, CompilerDirective)
NODE(CompilerDirective, AssumeAligned)
NODE(CompilerDirective, IgnoreTKR)
+ NODE(CompilerDirective, IVDep)
NODE(CompilerDirective, LoopCount)
NODE(CompilerDirective, NameValue)
NODE(CompilerDirective, Unrecognized)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 029c3de354b66..e76cdc15babff 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3351,6 +3351,7 @@ struct StmtFunctionStmt {
// !DIR$ name[=value] [, name[=value]]... = can be :
// !DIR$ UNROLL [N]
// !DIR$ UNROLL_AND_JAM [N]
+// !DIR$ IVDEP
// !DIR$ <anything else>
struct CompilerDirective {
UNION_CLASS_BOILERPLATE(CompilerDirective);
@@ -3376,10 +3377,12 @@ struct CompilerDirective {
struct UnrollAndJam {
WRAPPER_CLASS_BOILERPLATE(UnrollAndJam, std::optional<std::uint64_t>);
};
+ EMPTY_CLASS(IVDep);
EMPTY_CLASS(Unrecognized);
CharBlock source;
std::variant<std::list<IgnoreTKR>, LoopCount, std::list<AssumeAligned>,
- VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized>
+ VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized,
+ IVDep>
u;
};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6e6e88a32517c..cc6f2d9a73949 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2046,6 +2046,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// so no clean-up needs to be generated for these entities.
}
+ // Add attribute(s) on operations in fir::DoLoopOp if necessary.
+ void attachAttributesToDoLoopOperations(fir::DoLoopOp &doLoop) {
+ if (!doLoop.getOperation())
+ return;
+ if (auto loopAnnotAttr = doLoop.getLoopAnnotationAttr()) {
+ if (loopAnnotAttr.getParallelAccesses().size()) {
+ mlir::LLVM::AccessGroupAttr accessGroupAttr =
+ loopAnnotAttr.getParallelAccesses().front();
+ for (mlir::Block &block : doLoop.getRegion()) {
+ mlir::ArrayAttr attrs =
+ mlir::ArrayAttr::get(builder->getContext(), {accessGroupAttr});
+ for (mlir::Operation &op : block.getOperations()) {
+ if (fir::StoreOp storeOp = mlir::dyn_cast<fir::StoreOp>(op)) {
+ storeOp.setAccessGroupsAttr(attrs);
+ } else if (fir::LoadOp loadOp = mlir::dyn_cast<fir::LoadOp>(op)) {
+ loadOp.setAccessGroupsAttr(attrs);
+ } else if (hlfir::AssignOp assignOp =
+ mlir::dyn_cast<hlfir::AssignOp>(op)) {
+ // In some loops, the HLFIR AssignOp operation can be translated
+ // into FIR operation(s) containing StoreOp. It is therefore
+ // necessary to forward the AccessGroups attribute.
+ assignOp.getOperation()->setAttr("access_groups", attrs);
+ } else if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) {
+ callOp.setAccessGroupsAttr(attrs);
+ }
+ }
+ }
+ }
+ }
+ }
+
/// Generate FIR for a DO construct. There are six variants:
/// - unstructured infinite and while loops
/// - structured and unstructured increment loops
@@ -2155,6 +2186,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// This call may generate a branch in some contexts.
genFIR(endDoEval, unstructuredContext);
+
+ // Add attribute(s) on operations in fir::DoLoopOp if necessary
+ for (IncrementLoopInfo &info : incrementLoopNestInfo)
+ attachAttributesToDoLoopOperations(info.doLoop);
}
/// Generate FIR to evaluate loop control values (lower, upper and step).
@@ -2235,22 +2270,28 @@ class FirConverter : public Fortran::lower::AbstractConverter {
{}, {}, {}, {});
}
+ // Enabling loop vectorization attribute.
+ mlir::LLVM::LoopVectorizeAttr genLoopVectorizeAttr(bool enable = true) {
+ mlir::BoolAttr disableAttr =
+ mlir::BoolAttr::get(builder->getContext(), !enable);
+ return mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(),
+ /*disable=*/disableAttr, {}, {},
+ {}, {}, {}, {});
+ }
+
void addLoopAnnotationAttr(
IncrementLoopInfo &info,
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
mlir::LLVM::LoopVectorizeAttr va;
mlir::LLVM::LoopUnrollAttr ua;
mlir::LLVM::LoopUnrollAndJamAttr uja;
+ llvm::SmallVector<mlir::LLVM::AccessGroupAttr> aga;
bool has_attrs = false;
for (const auto *dir : dirs) {
Fortran::common::visit(
Fortran::common::visitors{
[&](const Fortran::parser::CompilerDirective::VectorAlways &) {
- mlir::BoolAttr falseAttr =
- mlir::BoolAttr::get(builder->getContext(), false);
- va = mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(),
- /*disable=*/falseAttr,
- {}, {}, {}, {}, {}, {});
+ va = genLoopVectorizeAttr();
has_attrs = true;
},
[&](const Fortran::parser::CompilerDirective::Unroll &u) {
@@ -2261,12 +2302,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
uja = genLoopUnrollAndJamAttr(u.v);
has_attrs = true;
},
+ [&](const Fortran::parser::CompilerDirective::IVDep &iv) {
+ va = genLoopVectorizeAttr();
+ aga.push_back(
+ mlir::LLVM::AccessGroupAttr::get(builder->getContext()));
+ has_attrs = true;
+ },
[&](const auto &) {}},
dir->u);
}
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
- /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
+ /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {},
+ /*parallelAccesses*/ aga);
if (has_attrs)
info.doLoop.setLoopAnnotationAttr(la);
}
@@ -2925,6 +2973,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
[&](const Fortran::parser::CompilerDirective::UnrollAndJam &) {
attachDirectiveToLoop(dir, &eval);
},
+ [&](const Fortran::parser::CompilerDirective::IVDep &) {
+ attachDirectiveToLoop(dir, &eval);
+ },
[&](const auto &) {}},
dir.u);
}
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 6a0f4d1090adc..b1907356161ee 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -649,7 +649,8 @@ Fortran::lower::genCallOpAndResult(
// Standard procedure call with fir.call.
auto call = builder.create<fir::CallOp>(
loc, funcType.getResults(), funcSymbolAttr, operands,
- /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs);
+ /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs,
+ /*accessGroups*/ mlir::ArrayAttr{});
callNumResults = call.getNumResults();
if (callNumResults != 0)
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b7f8a8d3a9d56..75d131506ea40 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1304,12 +1304,11 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue(
return fir::factory::componentToExtendedValue(builder, loc, element);
}
-void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
- mlir::Location loc,
- const fir::ExtendedValue &lhs,
- const fir::ExtendedValue &rhs,
- bool needFinalization,
- bool isTemporaryLHS) {
+void fir::factory::genScalarAssignment(
+ fir::FirOpBuilder &builder, mlir::Location loc,
+ const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs,
+ bool needFinalization, bool isTemporaryLHS,
+ std::optional<mlir::ArrayAttr> accessGroups) {
assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars");
auto type = fir::unwrapSequenceType(
fir::unwrapPassByRefType(fir::getBase(lhs).getType()));
@@ -1331,7 +1330,9 @@ void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
mlir::Value lhsAddr = fir::getBase(lhs);
rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()),
rhsVal);
- builder.create<fir::StoreOp>(loc, rhsVal, lhsAddr);
+ fir::StoreOp store = builder.create<fir::StoreOp>(loc, rhsVal, lhsAddr);
+ if (accessGroups)
+ store.setAccessGroupsAttr(*accessGroups);
}
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index b54b497ee4ba1..83d68557e2101 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -634,6 +634,10 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr())
llvmCall.setResAttrsAttr(resAttrs);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ call.getAccessGroups())
+ llvmCall.setAccessGroups(*optionalAccessGroups);
+
if (memAttr)
llvmCall.setMemoryEffectsAttr(
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
@@ -3267,6 +3271,11 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
loadOp.setTBAATags(*optionalTag);
else
attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
+
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ load.getAccessGroups())
+ loadOp.setAccessGroups(*optionalAccessGroups);
+
rewriter.replaceOp(load, loadOp.getResult());
}
return mlir::success();
@@ -3550,7 +3559,12 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
} else {
- newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
+ mlir::LLVM::StoreOp storeOp =
+ rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ store.getAccessGroups())
+ storeOp.setAccessGroups(*optionalAccessGroups);
+ newOp = storeOp;
}
if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa())
newOp.setTBAATags(*optionalTag);
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 1e8a7354da561..3d0d32fa9ade0 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3948,7 +3948,7 @@ llvm::LogicalResult fir::StoreOp::verify() {
void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Value value, mlir::Value memref) {
- build(builder, result, value, memref, {});
+ build(builder, result, value, memref, {}, {});
}
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 496a5560ac615..ea631dd1d72db 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -148,13 +148,17 @@ class AssignOpConversion : public mlir::OpRewritePattern<hlfir::AssignOp> {
!assignOp.isTemporaryLHS() &&
mlir::isa<fir::RecordType>(fir::getElementTypeOf(lhsExv));
+ std::optional<mlir::ArrayAttr> accessGroups = nullptr;
+ if (auto attrs = assignOp.getOperation()->getAttrOfType<mlir::ArrayAttr>(
+ "access_groups"))
+ accessGroups = attrs;
// genScalarAssignment() must take care of potential overlap
// between LHS and RHS. Note that the overlap is possible
// also for components of LHS/RHS, and the Assign() runtime
// must take care of it.
- fir::factory::genScalarAssignment(builder, loc, lhsExv, rhsExv,
- needFinalization,
- assignOp.isTemporaryLHS());
+ fir::factory::genScalarAssignment(
+ builder, loc, lhsExv, rhsExv, needFinalization,
+ assignOp.isTemporaryLHS(), accessGroups);
}
rewriter.eraseOp(assignOp);
return mlir::success();
diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
index 0c78a878cdc53..d3194c352c765 100644
--- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
@@ -207,7 +207,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> {
args.append(dispatch.getArgs().begin(), dispatch.getArgs().end());
rewriter.replaceOpWithNewOp<fir::CallOp>(
dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(),
- dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr());
+ dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(),
+ mlir::ArrayAttr{});
return mlir::success();
}
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index cfe9ecb29b0b7..329661653484d 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1294,6 +1294,7 @@ TYPE_PARSER(construct<StatOrErrmsg>("STAT =" >> statVariable) ||
// !DIR$ LOOP COUNT (n1[, n2]...)
// !DIR$ name[=value] [, name[=value]]...
// !DIR$ UNROLL [n]
+// !DIR$ IVDEP
// !DIR$ <anything else>
constexpr auto ignore_tkr{
"IGNORE_TKR" >> optionalList(construct<CompilerDirective::IgnoreTKR>(
@@ -1310,6 +1311,7 @@ constexpr auto unroll{
"UNROLL" >> construct<CompilerDirective::Unroll>(maybe(digitString64))};
constexpr auto unrollAndJam{"UNROLL_AND_JAM" >>
construct<CompilerDirective::UnrollAndJam>(maybe(digitString64))};
+constexpr auto ivdep{"IVDEP" >> construct<CompilerDirective::IVDep>()};
TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
sourced((construct<CompilerDirective>(ignore_tkr) ||
construct<CompilerDirective>(loopCount) ||
@@ -1317,6 +1319,7 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
construct<CompilerDirective>(vectorAlways) ||
construct<CompilerDirective>(unrollAndJam) ||
construct<CompilerDirective>(unroll) ||
+ construct<CompilerDirective>(ivdep) ||
construct<CompilerDirective>(
many(construct<CompilerDirective::NameValue>(
name, maybe(("="_tok || ":"_tok) >> digitString64))))) /
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 8cdbf8ed2a672..afa4d1989fc33 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1855,6 +1855,7 @@ class UnparseVisitor {
Word("!DIR$ UNROLL_AND_JAM");
Walk(" ", unrollAndJam.v);
},
+ [&](const CompilerDirective::IVDep &) { Word("!DIR$ IVDEP"); },
[&](const CompilerDirective::Unrecognized &) {
Word("!DIR$ ");
Word(x.source.ToString());
diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp
index 1a0a0d145b3e2..0db7a87f6dc84 100644
--- a/flang/lib/Semantics/canonicalize-directives.cpp
+++ b/flang/lib/Semantics/canonicalize-directives.cpp
@@ -57,7 +57,8 @@ static bool IsExecutionDirective(const parser::CompilerDirective &dir) {
return std::holds_alternative<parser::CompilerDirective::VectorAlways>(
dir.u) ||
std::holds_alternative<parser::CompilerDirective::Unroll>(dir.u) ||
- std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(d...
[truncated]
|
@llvm/pr-subscribers-flang-codegen Author: Jean-Didier PAILLEUX (JDPailleux) ChangesThis directive tells the compiler to ignore vector dependencies in the following loop and it must be placed before a Sometimes the compiler may not have sufficient information to decide whether a particular loop is vectorizable due to potential dependencies between iterations and the directive is here to tell to the compiler that vectorization is safe with This directive is also equivalent to Patch is 33.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133728.diff 19 Files Affected:
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 003b4358572c1..46b362e9c6c01 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -735,7 +735,8 @@ void genScalarAssignment(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::ExtendedValue &lhs,
const fir::ExtendedValue &rhs,
bool needFinalization = false,
- bool isTemporaryLHS = false);
+ bool isTemporaryLHS = false,
+ std::optional<mlir::ArrayAttr> accessGroups = nullptr);
/// Assign \p rhs to \p lhs. Both \p rhs and \p lhs must be scalar derived
/// types. The assignment follows Fortran intrinsic assignment semantic for
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c8d8ab41552c2..871f4cb1ff847 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -303,7 +303,8 @@ def fir_LoadOp : fir_OneResultOp<"load", [FirAliasTagOpInterface]> {
}];
let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref,
- OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+ OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups);
let builders = [OpBuilder<(ins "mlir::Value":$refVal)>,
OpBuilder<(ins "mlir::Type":$resTy, "mlir::Value":$refVal)>];
@@ -335,8 +336,9 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
}];
let arguments = (ins AnyType:$value,
- Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
- OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+ Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
+ OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups);
let builders = [OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$memref)>];
@@ -2484,15 +2486,13 @@ def fir_CallOp : fir_Op<"call",
```
}];
- let arguments = (ins
- OptionalAttr<SymbolRefAttr>:$callee,
- Variadic<AnyType>:$args,
- OptionalAttr<DictArrayAttr>:$arg_attrs,
- OptionalAttr<DictArrayAttr>:$res_attrs,
- OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
- DefaultValuedAttr<Arith_FastMathAttr,
- "::mlir::arith::FastMathFlags::none">:$fastmath
- );
+ let arguments = (ins OptionalAttr<SymbolRefAttr>:$callee,
+ Variadic<AnyType>:$args, OptionalAttr<DictArrayAttr>:$arg_attrs,
+ OptionalAttr<DictArrayAttr>:$res_attrs,
+ OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
+ OptionalAttr<LLVM_AccessGroupArrayAttr>:$accessGroups,
+ DefaultValuedAttr<Arith_FastMathAttr,
+ "::mlir::arith::FastMathFlags::none">:$fastmath);
let results = (outs Variadic<AnyType>);
let hasCustomAssemblyFormat = 1;
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index f69930d5b53b3..2d07bc92df60c 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -21,6 +21,7 @@ include "flang/Optimizer/Dialect/FIRAttr.td"
include "flang/Optimizer/Dialect/FortranVariableInterface.td"
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "mlir/IR/BuiltinAttributes.td"
// Base class for FIR operations.
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 9bff2dab974ec..e6f7523868f96 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -204,6 +204,7 @@ class ParseTreeDumper {
NODE(parser, CompilerDirective)
NODE(CompilerDirective, AssumeAligned)
NODE(CompilerDirective, IgnoreTKR)
+ NODE(CompilerDirective, IVDep)
NODE(CompilerDirective, LoopCount)
NODE(CompilerDirective, NameValue)
NODE(CompilerDirective, Unrecognized)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 029c3de354b66..e76cdc15babff 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3351,6 +3351,7 @@ struct StmtFunctionStmt {
// !DIR$ name[=value] [, name[=value]]... = can be :
// !DIR$ UNROLL [N]
// !DIR$ UNROLL_AND_JAM [N]
+// !DIR$ IVDEP
// !DIR$ <anything else>
struct CompilerDirective {
UNION_CLASS_BOILERPLATE(CompilerDirective);
@@ -3376,10 +3377,12 @@ struct CompilerDirective {
struct UnrollAndJam {
WRAPPER_CLASS_BOILERPLATE(UnrollAndJam, std::optional<std::uint64_t>);
};
+ EMPTY_CLASS(IVDep);
EMPTY_CLASS(Unrecognized);
CharBlock source;
std::variant<std::list<IgnoreTKR>, LoopCount, std::list<AssumeAligned>,
- VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized>
+ VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized,
+ IVDep>
u;
};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6e6e88a32517c..cc6f2d9a73949 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2046,6 +2046,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// so no clean-up needs to be generated for these entities.
}
+ // Add attribute(s) on operations in fir::DoLoopOp if necessary.
+ void attachAttributesToDoLoopOperations(fir::DoLoopOp &doLoop) {
+ if (!doLoop.getOperation())
+ return;
+ if (auto loopAnnotAttr = doLoop.getLoopAnnotationAttr()) {
+ if (loopAnnotAttr.getParallelAccesses().size()) {
+ mlir::LLVM::AccessGroupAttr accessGroupAttr =
+ loopAnnotAttr.getParallelAccesses().front();
+ for (mlir::Block &block : doLoop.getRegion()) {
+ mlir::ArrayAttr attrs =
+ mlir::ArrayAttr::get(builder->getContext(), {accessGroupAttr});
+ for (mlir::Operation &op : block.getOperations()) {
+ if (fir::StoreOp storeOp = mlir::dyn_cast<fir::StoreOp>(op)) {
+ storeOp.setAccessGroupsAttr(attrs);
+ } else if (fir::LoadOp loadOp = mlir::dyn_cast<fir::LoadOp>(op)) {
+ loadOp.setAccessGroupsAttr(attrs);
+ } else if (hlfir::AssignOp assignOp =
+ mlir::dyn_cast<hlfir::AssignOp>(op)) {
+ // In some loops, the HLFIR AssignOp operation can be translated
+ // into FIR operation(s) containing StoreOp. It is therefore
+ // necessary to forward the AccessGroups attribute.
+ assignOp.getOperation()->setAttr("access_groups", attrs);
+ } else if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) {
+ callOp.setAccessGroupsAttr(attrs);
+ }
+ }
+ }
+ }
+ }
+ }
+
/// Generate FIR for a DO construct. There are six variants:
/// - unstructured infinite and while loops
/// - structured and unstructured increment loops
@@ -2155,6 +2186,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// This call may generate a branch in some contexts.
genFIR(endDoEval, unstructuredContext);
+
+ // Add attribute(s) on operations in fir::DoLoopOp if necessary
+ for (IncrementLoopInfo &info : incrementLoopNestInfo)
+ attachAttributesToDoLoopOperations(info.doLoop);
}
/// Generate FIR to evaluate loop control values (lower, upper and step).
@@ -2235,22 +2270,28 @@ class FirConverter : public Fortran::lower::AbstractConverter {
{}, {}, {}, {});
}
+ // Enabling loop vectorization attribute.
+ mlir::LLVM::LoopVectorizeAttr genLoopVectorizeAttr(bool enable = true) {
+ mlir::BoolAttr disableAttr =
+ mlir::BoolAttr::get(builder->getContext(), !enable);
+ return mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(),
+ /*disable=*/disableAttr, {}, {},
+ {}, {}, {}, {});
+ }
+
void addLoopAnnotationAttr(
IncrementLoopInfo &info,
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
mlir::LLVM::LoopVectorizeAttr va;
mlir::LLVM::LoopUnrollAttr ua;
mlir::LLVM::LoopUnrollAndJamAttr uja;
+ llvm::SmallVector<mlir::LLVM::AccessGroupAttr> aga;
bool has_attrs = false;
for (const auto *dir : dirs) {
Fortran::common::visit(
Fortran::common::visitors{
[&](const Fortran::parser::CompilerDirective::VectorAlways &) {
- mlir::BoolAttr falseAttr =
- mlir::BoolAttr::get(builder->getContext(), false);
- va = mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(),
- /*disable=*/falseAttr,
- {}, {}, {}, {}, {}, {});
+ va = genLoopVectorizeAttr();
has_attrs = true;
},
[&](const Fortran::parser::CompilerDirective::Unroll &u) {
@@ -2261,12 +2302,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
uja = genLoopUnrollAndJamAttr(u.v);
has_attrs = true;
},
+ [&](const Fortran::parser::CompilerDirective::IVDep &iv) {
+ va = genLoopVectorizeAttr();
+ aga.push_back(
+ mlir::LLVM::AccessGroupAttr::get(builder->getContext()));
+ has_attrs = true;
+ },
[&](const auto &) {}},
dir->u);
}
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
- /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
+ /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {},
+ /*parallelAccesses*/ aga);
if (has_attrs)
info.doLoop.setLoopAnnotationAttr(la);
}
@@ -2925,6 +2973,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
[&](const Fortran::parser::CompilerDirective::UnrollAndJam &) {
attachDirectiveToLoop(dir, &eval);
},
+ [&](const Fortran::parser::CompilerDirective::IVDep &) {
+ attachDirectiveToLoop(dir, &eval);
+ },
[&](const auto &) {}},
dir.u);
}
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 6a0f4d1090adc..b1907356161ee 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -649,7 +649,8 @@ Fortran::lower::genCallOpAndResult(
// Standard procedure call with fir.call.
auto call = builder.create<fir::CallOp>(
loc, funcType.getResults(), funcSymbolAttr, operands,
- /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs);
+ /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs,
+ /*accessGroups*/ mlir::ArrayAttr{});
callNumResults = call.getNumResults();
if (callNumResults != 0)
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b7f8a8d3a9d56..75d131506ea40 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1304,12 +1304,11 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue(
return fir::factory::componentToExtendedValue(builder, loc, element);
}
-void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
- mlir::Location loc,
- const fir::ExtendedValue &lhs,
- const fir::ExtendedValue &rhs,
- bool needFinalization,
- bool isTemporaryLHS) {
+void fir::factory::genScalarAssignment(
+ fir::FirOpBuilder &builder, mlir::Location loc,
+ const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs,
+ bool needFinalization, bool isTemporaryLHS,
+ std::optional<mlir::ArrayAttr> accessGroups) {
assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars");
auto type = fir::unwrapSequenceType(
fir::unwrapPassByRefType(fir::getBase(lhs).getType()));
@@ -1331,7 +1330,9 @@ void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
mlir::Value lhsAddr = fir::getBase(lhs);
rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()),
rhsVal);
- builder.create<fir::StoreOp>(loc, rhsVal, lhsAddr);
+ fir::StoreOp store = builder.create<fir::StoreOp>(loc, rhsVal, lhsAddr);
+ if (accessGroups)
+ store.setAccessGroupsAttr(*accessGroups);
}
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index b54b497ee4ba1..83d68557e2101 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -634,6 +634,10 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr())
llvmCall.setResAttrsAttr(resAttrs);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ call.getAccessGroups())
+ llvmCall.setAccessGroups(*optionalAccessGroups);
+
if (memAttr)
llvmCall.setMemoryEffectsAttr(
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
@@ -3267,6 +3271,11 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
loadOp.setTBAATags(*optionalTag);
else
attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
+
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ load.getAccessGroups())
+ loadOp.setAccessGroups(*optionalAccessGroups);
+
rewriter.replaceOp(load, loadOp.getResult());
}
return mlir::success();
@@ -3550,7 +3559,12 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
} else {
- newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
+ mlir::LLVM::StoreOp storeOp =
+ rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
+ if (std::optional<mlir::ArrayAttr> optionalAccessGroups =
+ store.getAccessGroups())
+ storeOp.setAccessGroups(*optionalAccessGroups);
+ newOp = storeOp;
}
if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa())
newOp.setTBAATags(*optionalTag);
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 1e8a7354da561..3d0d32fa9ade0 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3948,7 +3948,7 @@ llvm::LogicalResult fir::StoreOp::verify() {
void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Value value, mlir::Value memref) {
- build(builder, result, value, memref, {});
+ build(builder, result, value, memref, {}, {});
}
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 496a5560ac615..ea631dd1d72db 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -148,13 +148,17 @@ class AssignOpConversion : public mlir::OpRewritePattern<hlfir::AssignOp> {
!assignOp.isTemporaryLHS() &&
mlir::isa<fir::RecordType>(fir::getElementTypeOf(lhsExv));
+ std::optional<mlir::ArrayAttr> accessGroups = nullptr;
+ if (auto attrs = assignOp.getOperation()->getAttrOfType<mlir::ArrayAttr>(
+ "access_groups"))
+ accessGroups = attrs;
// genScalarAssignment() must take care of potential overlap
// between LHS and RHS. Note that the overlap is possible
// also for components of LHS/RHS, and the Assign() runtime
// must take care of it.
- fir::factory::genScalarAssignment(builder, loc, lhsExv, rhsExv,
- needFinalization,
- assignOp.isTemporaryLHS());
+ fir::factory::genScalarAssignment(
+ builder, loc, lhsExv, rhsExv, needFinalization,
+ assignOp.isTemporaryLHS(), accessGroups);
}
rewriter.eraseOp(assignOp);
return mlir::success();
diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
index 0c78a878cdc53..d3194c352c765 100644
--- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
@@ -207,7 +207,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> {
args.append(dispatch.getArgs().begin(), dispatch.getArgs().end());
rewriter.replaceOpWithNewOp<fir::CallOp>(
dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(),
- dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr());
+ dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(),
+ mlir::ArrayAttr{});
return mlir::success();
}
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index cfe9ecb29b0b7..329661653484d 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1294,6 +1294,7 @@ TYPE_PARSER(construct<StatOrErrmsg>("STAT =" >> statVariable) ||
// !DIR$ LOOP COUNT (n1[, n2]...)
// !DIR$ name[=value] [, name[=value]]...
// !DIR$ UNROLL [n]
+// !DIR$ IVDEP
// !DIR$ <anything else>
constexpr auto ignore_tkr{
"IGNORE_TKR" >> optionalList(construct<CompilerDirective::IgnoreTKR>(
@@ -1310,6 +1311,7 @@ constexpr auto unroll{
"UNROLL" >> construct<CompilerDirective::Unroll>(maybe(digitString64))};
constexpr auto unrollAndJam{"UNROLL_AND_JAM" >>
construct<CompilerDirective::UnrollAndJam>(maybe(digitString64))};
+constexpr auto ivdep{"IVDEP" >> construct<CompilerDirective::IVDep>()};
TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
sourced((construct<CompilerDirective>(ignore_tkr) ||
construct<CompilerDirective>(loopCount) ||
@@ -1317,6 +1319,7 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
construct<CompilerDirective>(vectorAlways) ||
construct<CompilerDirective>(unrollAndJam) ||
construct<CompilerDirective>(unroll) ||
+ construct<CompilerDirective>(ivdep) ||
construct<CompilerDirective>(
many(construct<CompilerDirective::NameValue>(
name, maybe(("="_tok || ":"_tok) >> digitString64))))) /
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 8cdbf8ed2a672..afa4d1989fc33 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1855,6 +1855,7 @@ class UnparseVisitor {
Word("!DIR$ UNROLL_AND_JAM");
Walk(" ", unrollAndJam.v);
},
+ [&](const CompilerDirective::IVDep &) { Word("!DIR$ IVDEP"); },
[&](const CompilerDirective::Unrecognized &) {
Word("!DIR$ ");
Word(x.source.ToString());
diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp
index 1a0a0d145b3e2..0db7a87f6dc84 100644
--- a/flang/lib/Semantics/canonicalize-directives.cpp
+++ b/flang/lib/Semantics/canonicalize-directives.cpp
@@ -57,7 +57,8 @@ static bool IsExecutionDirective(const parser::CompilerDirective &dir) {
return std::holds_alternative<parser::CompilerDirective::VectorAlways>(
dir.u) ||
std::holds_alternative<parser::CompilerDirective::Unroll>(dir.u) ||
- std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(d...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of drive-through comments.
void attachAttributesToDoLoopOperations(fir::DoLoopOp &doLoop) { | ||
if (!doLoop.getOperation()) | ||
return; | ||
if (auto loopAnnotAttr = doLoop.getLoopAnnotationAttr()) { | ||
if (loopAnnotAttr.getParallelAccesses().size()) { | ||
mlir::LLVM::AccessGroupAttr accessGroupAttr = | ||
loopAnnotAttr.getParallelAccesses().front(); | ||
for (mlir::Block &block : doLoop.getRegion()) { | ||
mlir::ArrayAttr attrs = | ||
mlir::ArrayAttr::get(builder->getContext(), {accessGroupAttr}); | ||
for (mlir::Operation &op : block.getOperations()) { | ||
if (fir::StoreOp storeOp = mlir::dyn_cast<fir::StoreOp>(op)) { | ||
storeOp.setAccessGroupsAttr(attrs); | ||
} else if (fir::LoadOp loadOp = mlir::dyn_cast<fir::LoadOp>(op)) { | ||
loadOp.setAccessGroupsAttr(attrs); | ||
} else if (hlfir::AssignOp assignOp = | ||
mlir::dyn_cast<hlfir::AssignOp>(op)) { | ||
// In some loops, the HLFIR AssignOp operation can be translated | ||
// into FIR operation(s) containing StoreOp. It is therefore | ||
// necessary to forward the AccessGroups attribute. | ||
assignOp.getOperation()->setAttr("access_groups", attrs); | ||
} else if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) { | ||
callOp.setAccessGroupsAttr(attrs); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to go into operations that are nested inside other operations?
subroutine sb(A)
integer, intent(inout) :: A(:)
integer :: i
!dir$ ivdep
do i=1,100
if (i .gt. 10) then
A(i) = A(i) + i
end if
end do
end subroutine
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's right ! So I also have to go through the operations in depth
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is walk()
what you were looking for?
doLoop.walk([&] (auto *op) {
if (mlir::isa<StoreOp, LoadOp, CallOp>(op))
// ...
});
// or
doLoop.walk([&] (hlfir::AssignOp assignOp) {
// ...
});
assignOp.isTemporaryLHS()); | ||
fir::factory::genScalarAssignment( | ||
builder, loc, lhsExv, rhsExv, needFinalization, | ||
assignOp.isTemporaryLHS(), accessGroups); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a few other places in this file where load
and store
ops are created, should they also be tagged with the access groups?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so. The access group
applies on loads
and stores
from assignments in a loop. But I'll make a new pass on it and add tests in that case if needed.
@Leporacanthicus had the parsing portions of this PR in an earlier patch #101045. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your work so far on this
@@ -21,6 +21,7 @@ include "flang/Optimizer/Dialect/FIRAttr.td" | |||
include "flang/Optimizer/Dialect/FortranVariableInterface.td" | |||
include "mlir/Dialect/Arith/IR/ArithBase.td" | |||
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" | |||
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: What is this needed for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I don't need it ! Sorry
} else if (hlfir::AssignOp assignOp = | ||
mlir::dyn_cast<hlfir::AssignOp>(op)) { | ||
// In some loops, the HLFIR AssignOp operation can be translated | ||
// into FIR operation(s) containing StoreOp. It is therefore | ||
// necessary to forward the AccessGroups attribute. | ||
assignOp.getOperation()->setAttr("access_groups", attrs); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tracking everything that could generate a store sounds difficult to maintain in the long term. I wonder if this should instead be added by a pass late in the pipeline after all of these stores have been generated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding a pass that checks for all fir.do_loop
with the parallelAccesses
attribute and do the thing in this code here? It can be done, yes, and it could actually be simpler to manage yes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I'm not familiar enough with the access group metadata to give better feedback, but I agree with @tblah in wondering if this should happen later, after store ops have already been created from all the other ops that can produce them.
! CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[ANNOTATION:.*]] | ||
do i=1,10 | ||
a(i)=i | ||
!CHECK: store i32 {{.*}}, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT:.*]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: DISTINCT
instead of DISTRINCT
void attachAttributesToDoLoopOperations(fir::DoLoopOp &doLoop) { | ||
if (!doLoop.getOperation()) | ||
return; | ||
if (auto loopAnnotAttr = doLoop.getLoopAnnotationAttr()) { | ||
if (loopAnnotAttr.getParallelAccesses().size()) { | ||
mlir::LLVM::AccessGroupAttr accessGroupAttr = | ||
loopAnnotAttr.getParallelAccesses().front(); | ||
for (mlir::Block &block : doLoop.getRegion()) { | ||
mlir::ArrayAttr attrs = | ||
mlir::ArrayAttr::get(builder->getContext(), {accessGroupAttr}); | ||
for (mlir::Operation &op : block.getOperations()) { | ||
if (fir::StoreOp storeOp = mlir::dyn_cast<fir::StoreOp>(op)) { | ||
storeOp.setAccessGroupsAttr(attrs); | ||
} else if (fir::LoadOp loadOp = mlir::dyn_cast<fir::LoadOp>(op)) { | ||
loadOp.setAccessGroupsAttr(attrs); | ||
} else if (hlfir::AssignOp assignOp = | ||
mlir::dyn_cast<hlfir::AssignOp>(op)) { | ||
// In some loops, the HLFIR AssignOp operation can be translated | ||
// into FIR operation(s) containing StoreOp. It is therefore | ||
// necessary to forward the AccessGroups attribute. | ||
assignOp.getOperation()->setAttr("access_groups", attrs); | ||
} else if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) { | ||
callOp.setAccessGroupsAttr(attrs); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is walk()
what you were looking for?
doLoop.walk([&] (auto *op) {
if (mlir::isa<StoreOp, LoadOp, CallOp>(op))
// ...
});
// or
doLoop.walk([&] (hlfir::AssignOp assignOp) {
// ...
});
@JDPailleux do you have any update on this? |
Hi, sorry for the late reply (First I was waiting response from #101045 and after I was unavailable). I'll push an updated version soon. |
Hi @JDPailleux, Thanks for the patch. but flang will generate below metadata: IMO, the behavior of clang and flang should match. In AMD downstream compiler we have below support for different directives
Let me know your opinion. |
This directive tells the compiler to ignore vector dependencies in the following loop and it must be placed before a
do loop
.Sometimes the compiler may not have sufficient information to decide whether a particular loop is vectorizable due to potential dependencies between iterations and the directive is here to tell to the compiler that vectorization is safe with
parallelAccesses
metadata.This directive is also equivalent to
#pragma clang loop assume(safety)
in C++