diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 003b4358572c1..46b362e9c6c01 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -735,7 +735,8 @@ void genScalarAssignment(fir::FirOpBuilder &builder, mlir::Location loc, const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs, bool needFinalization = false, - bool isTemporaryLHS = false); + bool isTemporaryLHS = false, + std::optional accessGroups = nullptr); /// Assign \p rhs to \p lhs. Both \p rhs and \p lhs must be scalar derived /// types. The assignment follows Fortran intrinsic assignment semantic for diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index c8d8ab41552c2..871f4cb1ff847 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -303,7 +303,8 @@ def fir_LoadOp : fir_OneResultOp<"load", [FirAliasTagOpInterface]> { }]; let arguments = (ins Arg:$memref, - OptionalAttr:$tbaa); + OptionalAttr:$tbaa, + OptionalAttr:$accessGroups); let builders = [OpBuilder<(ins "mlir::Value":$refVal)>, OpBuilder<(ins "mlir::Type":$resTy, "mlir::Value":$refVal)>]; @@ -335,8 +336,9 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> { }]; let arguments = (ins AnyType:$value, - Arg:$memref, - OptionalAttr:$tbaa); + Arg:$memref, + OptionalAttr:$tbaa, + OptionalAttr:$accessGroups); let builders = [OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$memref)>]; @@ -2484,15 +2486,13 @@ def fir_CallOp : fir_Op<"call", ``` }]; - let arguments = (ins - OptionalAttr:$callee, - Variadic:$args, - OptionalAttr:$arg_attrs, - OptionalAttr:$res_attrs, - OptionalAttr:$procedure_attrs, - DefaultValuedAttr:$fastmath - ); + let arguments = (ins OptionalAttr:$callee, + Variadic:$args, OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs, + OptionalAttr:$procedure_attrs, + OptionalAttr:$accessGroups, + DefaultValuedAttr:$fastmath); let results = (outs Variadic); let hasCustomAssemblyFormat = 1; diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index f69930d5b53b3..2d07bc92df60c 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -21,6 +21,7 @@ include "flang/Optimizer/Dialect/FIRAttr.td" include "flang/Optimizer/Dialect/FortranVariableInterface.td" include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" +include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/IR/BuiltinAttributes.td" // Base class for FIR operations. diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 9bff2dab974ec..e6f7523868f96 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -204,6 +204,7 @@ class ParseTreeDumper { NODE(parser, CompilerDirective) NODE(CompilerDirective, AssumeAligned) NODE(CompilerDirective, IgnoreTKR) + NODE(CompilerDirective, IVDep) NODE(CompilerDirective, LoopCount) NODE(CompilerDirective, NameValue) NODE(CompilerDirective, Unrecognized) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 029c3de354b66..e76cdc15babff 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3351,6 +3351,7 @@ struct StmtFunctionStmt { // !DIR$ name[=value] [, name[=value]]... = can be : // !DIR$ UNROLL [N] // !DIR$ UNROLL_AND_JAM [N] +// !DIR$ IVDEP // !DIR$ struct CompilerDirective { UNION_CLASS_BOILERPLATE(CompilerDirective); @@ -3376,10 +3377,12 @@ struct CompilerDirective { struct UnrollAndJam { WRAPPER_CLASS_BOILERPLATE(UnrollAndJam, std::optional); }; + EMPTY_CLASS(IVDep); EMPTY_CLASS(Unrecognized); CharBlock source; std::variant, LoopCount, std::list, - VectorAlways, std::list, Unroll, UnrollAndJam, Unrecognized> + VectorAlways, std::list, Unroll, UnrollAndJam, Unrecognized, + IVDep> u; }; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 6e6e88a32517c..cc6f2d9a73949 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2046,6 +2046,37 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } + // Add attribute(s) on operations in fir::DoLoopOp if necessary. + void attachAttributesToDoLoopOperations(fir::DoLoopOp &doLoop) { + if (!doLoop.getOperation()) + return; + if (auto loopAnnotAttr = doLoop.getLoopAnnotationAttr()) { + if (loopAnnotAttr.getParallelAccesses().size()) { + mlir::LLVM::AccessGroupAttr accessGroupAttr = + loopAnnotAttr.getParallelAccesses().front(); + for (mlir::Block &block : doLoop.getRegion()) { + mlir::ArrayAttr attrs = + mlir::ArrayAttr::get(builder->getContext(), {accessGroupAttr}); + for (mlir::Operation &op : block.getOperations()) { + if (fir::StoreOp storeOp = mlir::dyn_cast(op)) { + storeOp.setAccessGroupsAttr(attrs); + } else if (fir::LoadOp loadOp = mlir::dyn_cast(op)) { + loadOp.setAccessGroupsAttr(attrs); + } else if (hlfir::AssignOp assignOp = + mlir::dyn_cast(op)) { + // In some loops, the HLFIR AssignOp operation can be translated + // into FIR operation(s) containing StoreOp. It is therefore + // necessary to forward the AccessGroups attribute. + assignOp.getOperation()->setAttr("access_groups", attrs); + } else if (fir::CallOp callOp = mlir::dyn_cast(op)) { + callOp.setAccessGroupsAttr(attrs); + } + } + } + } + } + } + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops @@ -2155,6 +2186,10 @@ class FirConverter : public Fortran::lower::AbstractConverter { // This call may generate a branch in some contexts. genFIR(endDoEval, unstructuredContext); + + // Add attribute(s) on operations in fir::DoLoopOp if necessary + for (IncrementLoopInfo &info : incrementLoopNestInfo) + attachAttributesToDoLoopOperations(info.doLoop); } /// Generate FIR to evaluate loop control values (lower, upper and step). @@ -2235,22 +2270,28 @@ class FirConverter : public Fortran::lower::AbstractConverter { {}, {}, {}, {}); } + // Enabling loop vectorization attribute. + mlir::LLVM::LoopVectorizeAttr genLoopVectorizeAttr(bool enable = true) { + mlir::BoolAttr disableAttr = + mlir::BoolAttr::get(builder->getContext(), !enable); + return mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(), + /*disable=*/disableAttr, {}, {}, + {}, {}, {}, {}); + } + void addLoopAnnotationAttr( IncrementLoopInfo &info, llvm::SmallVectorImpl &dirs) { mlir::LLVM::LoopVectorizeAttr va; mlir::LLVM::LoopUnrollAttr ua; mlir::LLVM::LoopUnrollAndJamAttr uja; + llvm::SmallVector aga; bool has_attrs = false; for (const auto *dir : dirs) { Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::CompilerDirective::VectorAlways &) { - mlir::BoolAttr falseAttr = - mlir::BoolAttr::get(builder->getContext(), false); - va = mlir::LLVM::LoopVectorizeAttr::get(builder->getContext(), - /*disable=*/falseAttr, - {}, {}, {}, {}, {}, {}); + va = genLoopVectorizeAttr(); has_attrs = true; }, [&](const Fortran::parser::CompilerDirective::Unroll &u) { @@ -2261,12 +2302,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { uja = genLoopUnrollAndJamAttr(u.v); has_attrs = true; }, + [&](const Fortran::parser::CompilerDirective::IVDep &iv) { + va = genLoopVectorizeAttr(); + aga.push_back( + mlir::LLVM::AccessGroupAttr::get(builder->getContext())); + has_attrs = true; + }, [&](const auto &) {}}, dir->u); } mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get( builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua, - /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}); + /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, + /*parallelAccesses*/ aga); if (has_attrs) info.doLoop.setLoopAnnotationAttr(la); } @@ -2925,6 +2973,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { [&](const Fortran::parser::CompilerDirective::UnrollAndJam &) { attachDirectiveToLoop(dir, &eval); }, + [&](const Fortran::parser::CompilerDirective::IVDep &) { + attachDirectiveToLoop(dir, &eval); + }, [&](const auto &) {}}, dir.u); } diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 6a0f4d1090adc..b1907356161ee 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -649,7 +649,8 @@ Fortran::lower::genCallOpAndResult( // Standard procedure call with fir.call. auto call = builder.create( loc, funcType.getResults(), funcSymbolAttr, operands, - /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs); + /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs, + /*accessGroups*/ mlir::ArrayAttr{}); callNumResults = call.getNumResults(); if (callNumResults != 0) diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index b7f8a8d3a9d56..75d131506ea40 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -1304,12 +1304,11 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue( return fir::factory::componentToExtendedValue(builder, loc, element); } -void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder, - mlir::Location loc, - const fir::ExtendedValue &lhs, - const fir::ExtendedValue &rhs, - bool needFinalization, - bool isTemporaryLHS) { +void fir::factory::genScalarAssignment( + fir::FirOpBuilder &builder, mlir::Location loc, + const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs, + bool needFinalization, bool isTemporaryLHS, + std::optional accessGroups) { assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars"); auto type = fir::unwrapSequenceType( fir::unwrapPassByRefType(fir::getBase(lhs).getType())); @@ -1331,7 +1330,9 @@ void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder, mlir::Value lhsAddr = fir::getBase(lhs); rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()), rhsVal); - builder.create(loc, rhsVal, lhsAddr); + fir::StoreOp store = builder.create(loc, rhsVal, lhsAddr); + if (accessGroups) + store.setAccessGroupsAttr(*accessGroups); } } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index b54b497ee4ba1..83d68557e2101 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -634,6 +634,10 @@ struct CallOpConversion : public fir::FIROpConversion { if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr()) llvmCall.setResAttrsAttr(resAttrs); + if (std::optional optionalAccessGroups = + call.getAccessGroups()) + llvmCall.setAccessGroups(*optionalAccessGroups); + if (memAttr) llvmCall.setMemoryEffectsAttr( mlir::cast(memAttr)); @@ -3267,6 +3271,11 @@ struct LoadOpConversion : public fir::FIROpConversion { loadOp.setTBAATags(*optionalTag); else attachTBAATag(loadOp, load.getType(), load.getType(), nullptr); + + if (std::optional optionalAccessGroups = + load.getAccessGroups()) + loadOp.setAccessGroups(*optionalAccessGroups); + rewriter.replaceOp(load, loadOp.getResult()); } return mlir::success(); @@ -3550,7 +3559,12 @@ struct StoreOpConversion : public fir::FIROpConversion { newOp = rewriter.create( loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false); } else { - newOp = rewriter.create(loc, llvmValue, llvmMemref); + mlir::LLVM::StoreOp storeOp = + rewriter.create(loc, llvmValue, llvmMemref); + if (std::optional optionalAccessGroups = + store.getAccessGroups()) + storeOp.setAccessGroups(*optionalAccessGroups); + newOp = storeOp; } if (std::optional optionalTag = store.getTbaa()) newOp.setTBAATags(*optionalTag); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 1e8a7354da561..3d0d32fa9ade0 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3948,7 +3948,7 @@ llvm::LogicalResult fir::StoreOp::verify() { void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value value, mlir::Value memref) { - build(builder, result, value, memref, {}); + build(builder, result, value, memref, {}, {}); } //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index 496a5560ac615..ea631dd1d72db 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -148,13 +148,17 @@ class AssignOpConversion : public mlir::OpRewritePattern { !assignOp.isTemporaryLHS() && mlir::isa(fir::getElementTypeOf(lhsExv)); + std::optional accessGroups = nullptr; + if (auto attrs = assignOp.getOperation()->getAttrOfType( + "access_groups")) + accessGroups = attrs; // genScalarAssignment() must take care of potential overlap // between LHS and RHS. Note that the overlap is possible // also for components of LHS/RHS, and the Assign() runtime // must take care of it. - fir::factory::genScalarAssignment(builder, loc, lhsExv, rhsExv, - needFinalization, - assignOp.isTemporaryLHS()); + fir::factory::genScalarAssignment( + builder, loc, lhsExv, rhsExv, needFinalization, + assignOp.isTemporaryLHS(), accessGroups); } rewriter.eraseOp(assignOp); return mlir::success(); diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp index 0c78a878cdc53..d3194c352c765 100644 --- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp @@ -207,7 +207,8 @@ struct DispatchOpConv : public OpConversionPattern { args.append(dispatch.getArgs().begin(), dispatch.getArgs().end()); rewriter.replaceOpWithNewOp( dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(), - dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr()); + dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(), + mlir::ArrayAttr{}); return mlir::success(); } diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index cfe9ecb29b0b7..329661653484d 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1294,6 +1294,7 @@ TYPE_PARSER(construct("STAT =" >> statVariable) || // !DIR$ LOOP COUNT (n1[, n2]...) // !DIR$ name[=value] [, name[=value]]... // !DIR$ UNROLL [n] +// !DIR$ IVDEP // !DIR$ constexpr auto ignore_tkr{ "IGNORE_TKR" >> optionalList(construct( @@ -1310,6 +1311,7 @@ constexpr auto unroll{ "UNROLL" >> construct(maybe(digitString64))}; constexpr auto unrollAndJam{"UNROLL_AND_JAM" >> construct(maybe(digitString64))}; +constexpr auto ivdep{"IVDEP" >> construct()}; TYPE_PARSER(beginDirective >> "DIR$ "_tok >> sourced((construct(ignore_tkr) || construct(loopCount) || @@ -1317,6 +1319,7 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >> construct(vectorAlways) || construct(unrollAndJam) || construct(unroll) || + construct(ivdep) || construct( many(construct( name, maybe(("="_tok || ":"_tok) >> digitString64))))) / diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 8cdbf8ed2a672..afa4d1989fc33 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1855,6 +1855,7 @@ class UnparseVisitor { Word("!DIR$ UNROLL_AND_JAM"); Walk(" ", unrollAndJam.v); }, + [&](const CompilerDirective::IVDep &) { Word("!DIR$ IVDEP"); }, [&](const CompilerDirective::Unrecognized &) { Word("!DIR$ "); Word(x.source.ToString()); diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp index 1a0a0d145b3e2..0db7a87f6dc84 100644 --- a/flang/lib/Semantics/canonicalize-directives.cpp +++ b/flang/lib/Semantics/canonicalize-directives.cpp @@ -57,7 +57,8 @@ static bool IsExecutionDirective(const parser::CompilerDirective &dir) { return std::holds_alternative( dir.u) || std::holds_alternative(dir.u) || - std::holds_alternative(dir.u); + std::holds_alternative(dir.u) || + std::holds_alternative(dir.u); } void CanonicalizationOfDirectives::Post(parser::SpecificationPart &spec) { @@ -119,6 +120,9 @@ void CanonicalizationOfDirectives::Post(parser::Block &block) { [&](parser::CompilerDirective::UnrollAndJam &) { CheckLoopDirective(*dir, block, it); }, + [&](parser::CompilerDirective::IVDep &) { + CheckLoopDirective(*dir, block, it); + }, [&](auto &) {}}, dir->u); } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 8ba476ec547fc..8da368a25d3cd 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -9573,7 +9573,8 @@ void ResolveNamesVisitor::Post(const parser::AssignedGotoStmt &x) { void ResolveNamesVisitor::Post(const parser::CompilerDirective &x) { if (std::holds_alternative(x.u) || std::holds_alternative(x.u) || - std::holds_alternative(x.u)) { + std::holds_alternative(x.u) || + std::holds_alternative(x.u)) { return; } if (const auto *tkr{ diff --git a/flang/test/Integration/ivdep.f90 b/flang/test/Integration/ivdep.f90 new file mode 100644 index 0000000000000..48441379c4780 --- /dev/null +++ b/flang/test/Integration/ivdep.f90 @@ -0,0 +1,115 @@ +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s + +! CHECK-LABEL: ivdep_test1 +subroutine ivdep_test1 + integer :: a(10) + !dir$ ivdep + ! CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[ANNOTATION:.*]] + do i=1,10 + a(i)=i + !CHECK: store i32 {{.*}}, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT:.*]] + !CHECK: %[[VAL_8:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT]] + !CHECK: %[[VAL_9:.*]] = sext i32 %[[VAL_8]] to i64 + !CHECK: %[[VAL_10:.*]] = sub nsw i64 %[[VAL_9]], 1 + !CHECK: %[[VAL_11:.*]] = mul nsw i64 %[[VAL_10]], 1 + !CHECK: %[[VAL_12:.*]] = mul nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_13:.*]] = add nsw i64 %[[VAL_12]], 0 + !CHECK: %[[VAL_14:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_13]] + !CHECK: store i32 %[[VAL_8]], ptr %[[VAL_14]], align 4, !llvm.access.group [[DISTRINCT]] + !CHECK: %[[VAL_15:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT]] + !CHECK: %[[VAL_16:.*]] = add nsw i32 %[[VAL_15]], 1 + !CHECK: %[[VAL_17:.*]] = sub i64 {{.*}}, 1 + !CHECK: br label {{.*}} + end do +end subroutine ivdep_test1 + + +! CHECK-LABEL: ivdep_test2 +subroutine ivdep_test2 + integer :: a(10), b(10), c(10) + !dir$ ivdep + !dir$ unknown + ! CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[ANNOTATION1:.*]] + do i=1,10 + a(i)=b(i)+c(i) + !CHECK: store i32 {{.*}}, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT1:.*]] + !CHECK: %[[VAL_10:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT1]] + !CHECK: %[[VAL_11:.*]] = sext i32 %[[VAL_10]] to i64 + !CHECK: %[[VAL_12:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_13:.*]] = mul nsw i64 %[[VAL_12]], 1 + !CHECK: %[[VAL_14:.*]] = mul nsw i64 %[[VAL_13]], 1 + !CHECK: %[[VAL_15:.*]] = add nsw i64 %[[VAL_14]], 0 + !CHECK: %[[VAL_16:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_15]] + !CHECK: %[[VAL_17:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT1]] + !CHECK: %[[VAL_18:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_19:.*]] = mul nsw i64 %[[VAL_18]], 1 + !CHECK: %[[VAL_20:.*]] = mul nsw i64 %[[VAL_19]], 1 + !CHECK: %[[VAL_21:.*]] = add nsw i64 %[[VAL_20]], 0 + !CHECK: %[[VAL_22:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_21]] + !CHECK: %[[VAL_23:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT1]] + !CHECK: %[[VAL_24:.*]] = add i32 %[[VAL_17]], %[[VAL_23]] + !CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_26:.*]] = mul nsw i64 %[[VAL_25]], 1 + !CHECK: %[[VAL_27:.*]] = mul nsw i64 %[[VAL_26]], 1 + !CHECK: %[[VAL_28:.*]] = add nsw i64 %[[VAL_27]], 0 + !CHECK: %[[VAL_29:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_28]] + !CHECK: store i32 %[[VAL_24]], ptr %[[VAL_29]], align 4, !llvm.access.group [[DISTRINCT1]] + !CHECK: %[[VAL_30:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT1]] + !CHECK: %[[VAL_31:.*]] = add nsw i32 %[[VAL_30]], 1 + !CHECK: %[[VAL_32:.*]] = sub i64 {{.*}}, 1 + !CHECK: br label {{.*}} + end do +end subroutine ivdep_test2 + + +! CHECK-LABEL: ivdep_test3 +subroutine ivdep_test3 + integer :: a(10), b(10), c(10) + !dir$ ivdep + ! CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[ANNOTATION2:.*]] + do i=1,10 + a(i)=b(i)+c(i) + call foo() + !CHECK: store i32 {{.*}}, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT2:.*]] + !CHECK: %[[VAL_10:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT2]] + !CHECK: %[[VAL_11:.*]] = sext i32 %[[VAL_10]] to i64 + !CHECK: %[[VAL_12:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_13:.*]] = mul nsw i64 %[[VAL_12]], 1 + !CHECK: %[[VAL_14:.*]] = mul nsw i64 %[[VAL_13]], 1 + !CHECK: %[[VAL_15:.*]] = add nsw i64 %[[VAL_14]], 0 + !CHECK: %[[VAL_16:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_15]] + !CHECK: %[[VAL_17:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT2]] + !CHECK: %[[VAL_18:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_19:.*]] = mul nsw i64 %[[VAL_18]], 1 + !CHECK: %[[VAL_20:.*]] = mul nsw i64 %[[VAL_19]], 1 + !CHECK: %[[VAL_21:.*]] = add nsw i64 %[[VAL_20]], 0 + !CHECK: %[[VAL_22:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_21]] + !CHECK: %[[VAL_23:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT2]] + !CHECK: %[[VAL_24:.*]] = add i32 %[[VAL_17]], %[[VAL_23]] + !CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_11]], 1 + !CHECK: %[[VAL_26:.*]] = mul nsw i64 %[[VAL_25]], 1 + !CHECK: %[[VAL_27:.*]] = mul nsw i64 %[[VAL_26]], 1 + !CHECK: %[[VAL_28:.*]] = add nsw i64 %[[VAL_27]], 0 + !CHECK: %[[VAL_29:.*]] = getelementptr i32, ptr {{.*}}, i64 %[[VAL_28]] + !CHECK: store i32 %[[VAL_24]], ptr %[[VAL_29]], align 4, !llvm.access.group [[DISTRINCT2]] + !CHECK: call void @_QFivdep_test3Pfoo(), !llvm.access.group [[DISTRINCT2]] + !CHECK: %[[VAL_30:.*]] = load i32, ptr {{.*}}, align 4, !llvm.access.group [[DISTRINCT2]] + !CHECK: %[[VAL_31:.*]] = add nsw i32 %[[VAL_30]], 1 + !CHECK: %[[VAL_32:.*]] = sub i64 {{.*}}, 1 + !CHECK: br label {{.*}} + end do + contains + subroutine foo() + end subroutine +end subroutine ivdep_test3 + +! CHECK: ![[ANNOTATION]] = distinct !{![[ANNOTATION]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES:.*]]} +! CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true} +! CHECK: ![[PARALLEL_ACCESSES]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT]]} +! CHECK: [[DISTRINCT]] = distinct !{} +! CHECK: ![[ANNOTATION1]] = distinct !{![[ANNOTATION1]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES1:.*]]} +! CHECK: ![[PARALLEL_ACCESSES1]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT1]]} +! CHECK: [[DISTRINCT1]] = distinct !{} +! CHECK: ![[ANNOTATION2]] = distinct !{![[ANNOTATION2]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES2:.*]]} +! CHECK: ![[PARALLEL_ACCESSES2]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT2]]} +! CHECK: [[DISTRINCT2]] = distinct !{} diff --git a/flang/test/Lower/ivdep.f90 b/flang/test/Lower/ivdep.f90 new file mode 100644 index 0000000000000..a3cf22d2e970f --- /dev/null +++ b/flang/test/Lower/ivdep.f90 @@ -0,0 +1,96 @@ +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s + +! CHECK: #access_group = #llvm.access_group> +! CHECK: #access_group1 = #llvm.access_group> +! CHECK: #access_group2 = #llvm.access_group> +! CHECK: #loop_vectorize = #llvm.loop_vectorize +! CHECK: #loop_annotation = #llvm.loop_annotation +! CHECK: #loop_annotation1 = #llvm.loop_annotation +! CHECK: #loop_annotation2 = #llvm.loop_annotation + +! CHECK-LABEL: ivdep_test1 +subroutine ivdep_test1 + integer :: a(10) + !dir$ ivdep + !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation} + do i=1,10 + a(i)=i + !CHECK: fir.store %[[ARG1:.*]] to %[[VAL_4:.*]]#0 {accessGroups = [#access_group]} + !CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 {accessGroups = [#access_group]} + !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]]#0 {accessGroups = [#access_group]} + !CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> i64 + !CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_2:.*]]#0 (%[[VAL_11]]) : (!fir.ref>, i64) + !CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_12]] {access_groups = [#access_group]} : i32, !fir.ref + !CHECK: %[[VAL_13:.*]] = arith.addi %[[ARG0:.*]], %[[C1:.*]] overflow : index + !CHECK: %[[VAL_14:.*]] = fir.convert %[[C1]] : (index) -> i32 + !CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_4]]#0 {accessGroups = [#access_group]} + !CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_14]] overflow : i32 + !CHECK: fir.result %[[VAL_13]], %[[VAL_16]] + end do +end subroutine ivdep_test1 + + +! CHECK-LABEL: ivdep_test2 +subroutine ivdep_test2 + integer :: a(10), b(10), c(10) + !dir$ ivdep + !dir$ unknown + !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation1} + do i=1,10 + a(i)=b(i)+c(i) + !CHECK: fir.store %[[ARG1:.*]] to %[[VAL_10:.*]]#0 {accessGroups = [#access_group1]} + !CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group1]} + !CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 + !CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5:.*]]#0 (%[[VAL_16]]) : (!fir.ref>, i64) + !CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] {accessGroups = [#access_group1]} + !CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group1]} + !CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 + !CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_8:.*]]#0 (%[[VAL_20]]) : (!fir.ref>, i64) + !CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] {accessGroups = [#access_group1]} + !CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : i32 + !CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group1]} + !CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> i64 + !CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_2:.*]]#0 (%[[VAL_25]]) : (!fir.ref>, i64) + !CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] {access_groups = [#access_group1]} : i32, !fir.ref + !CHECK: %[[VAL_27:.*]] = arith.addi %[[ARG0:.*]], %[[C1:.*]] overflow : index + !CHECK: %[[VAL_28:.*]] = fir.convert %[[C1]] : (index) -> i32 + !CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group1]} + !CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_29]], %[[VAL_28]] overflow : i32 + !CHECK: fir.result %[[VAL_27]], %[[VAL_30]] + end do +end subroutine ivdep_test2 + + +! CHECK-LABEL: ivdep_test3 +subroutine ivdep_test3 + integer :: a(10), b(10), c(10) + !dir$ ivdep + !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation2} + do i=1,10 + a(i)=b(i)+c(i) + call foo() + !CHECK: fir.store %[[ARG1:.*]] to %[[VAL_10:.*]]#0 {accessGroups = [#access_group2]} + !CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group2]} + !CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 + !CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5:.*]]#0 (%[[VAL_16]]) : (!fir.ref>, i64) + !CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] {accessGroups = [#access_group2]} + !CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group2]} + !CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 + !CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_8:.*]]#0 (%[[VAL_20]]) : (!fir.ref>, i64) + !CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] {accessGroups = [#access_group2]} + !CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : i32 + !CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group2]} + !CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> i64 + !CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_2:.*]]#0 (%[[VAL_25]]) : (!fir.ref>, i64) + !CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] {access_groups = [#access_group2]} : i32, !fir.ref + !CHECK: fir.call @_QFivdep_test3Pfoo() fastmath {accessGroups = [#access_group2]} + !CHECK: %[[VAL_27:.*]] = arith.addi %[[ARG0:.*]], %[[C1:.*]] overflow : index + !CHECK: %[[VAL_28:.*]] = fir.convert %[[C1]] : (index) -> i32 + !CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_10]]#0 {accessGroups = [#access_group2]} + !CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_29]], %[[VAL_28]] overflow : i32 + !CHECK: fir.result %[[VAL_27]], %[[VAL_30]] + end do + contains + subroutine foo() + end subroutine +end subroutine ivdep_test3 diff --git a/flang/test/Parser/compiler-directives.f90 b/flang/test/Parser/compiler-directives.f90 index d1e386a01dd4d..5822be0df5bc1 100644 --- a/flang/test/Parser/compiler-directives.f90 +++ b/flang/test/Parser/compiler-directives.f90 @@ -57,3 +57,10 @@ subroutine unroll_and_jam do i=1,10 enddo end subroutine + +subroutine ivdep + !dir$ ivdep + ! CHECK: !DIR$ IVDEP + do i=1,10 + enddo +end subroutine