diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index b852b9fbc9c52..725e0e2adb821 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -962,7 +962,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { if (isTB(Inst) || isCB(Inst)) { Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode())); assert(Inst.getOpcode() != 0 && "Invalid branch instruction"); - } else if (Inst.getOpcode() == AArch64::Bcc) { + } else if (Inst.getOpcode() == AArch64::Bcc || + Inst.getOpcode() == AArch64::BCcc) { Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode( static_cast(Inst.getOperand(0).getImm()))); assert(Inst.getOperand(0).getImm() != AArch64CC::AL && @@ -991,6 +992,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { case AArch64::B: return 28; case AArch64::BL: return 28; case AArch64::Bcc: return 21; + case AArch64::BCcc: + return 21; } } diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 294210c6ac140..8f68f144691cc 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3102,6 +3102,35 @@ flow conditions such as in ``if`` and ``switch`` statements. Query for this feature with ``__has_builtin(__builtin_unpredictable)``. +``__builtin_consistent`` +------------------------ + +``__builtin_consistent`` is used to indicate that the value of an expression is +very likely to be consistent, e.g. branch having expression as condition will +behave very consistently and is very unlikely to change direction. + +**Syntax**: + +.. code-block:: c++ + + __builtin_consistent(long long) + +**Example of use**: + +.. code-block:: c++ + + if (__builtin_consistent(x > 0)) { + foo(); + } + +**Description**: + +The ``__builtin_consistent()`` builtin used with control flow conditions will +provide information about branch consistently behavior which can be used to +generate more efficient conditional branch instructions if target supports them +(like AArch64 FEAT_HBC ``BC.cond``). + +Query for this feature with ``__has_builtin(__builtin_consistent)``. ``__builtin_expect`` -------------------- diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index ec39e926889b9..948964c90dc76 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -667,8 +667,9 @@ BUILTIN(__builtin___printf_chk, "iicC*R.", "Fp:1:") BUILTIN(__builtin___vfprintf_chk, "iP*RicC*Ra", "FP:2:") BUILTIN(__builtin___vprintf_chk, "iicC*Ra", "FP:1:") -BUILTIN(__builtin_unpredictable, "LiLi" , "nc") -BUILTIN(__builtin_expect, "LiLiLi" , "ncE") +BUILTIN(__builtin_unpredictable, "LiLi", "nc") +BUILTIN(__builtin_consistent, "LiLi", "nc") +BUILTIN(__builtin_expect, "LiLiLi", "ncE") BUILTIN(__builtin_expect_with_probability, "LiLiLid", "ncE") BUILTIN(__builtin_prefetch, "vvC*.", "nc") BUILTIN(__builtin_readcyclecounter, "ULLi", "n") diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp index 5b4fc24b6f0e2..d1725f6151dba 100644 --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -365,6 +365,7 @@ class DeclRefFinder } case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: return Visit(CE->getArg(0)); default: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb6..63ccd42f9e313 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3137,10 +3137,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, "cast"); return RValue::get(Result); } - case Builtin::BI__builtin_unpredictable: { - // Always return the argument of __builtin_unpredictable. LLVM does not - // handle this builtin. Metadata for this builtin should be added directly - // to instructions such as branches or switches that use it. + case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: { + // Always return the argument of __builtin_unpredictable and + // __builtin_consistent. LLVM does not handle these builtins. Metadata for + // these builtins should be added directly to instructions such as branches + // or switches that use it. return RValue::get(EmitScalarExpr(E->getArg(0))); } case Builtin::BI__builtin_expect: { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index c719df1bfa050..2aa4b8253c7af 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2059,16 +2059,21 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { EmitBlock(SwitchExit.getBlock(), true); incrementProfileCounter(&S); - // If the switch has a condition wrapped by __builtin_unpredictable, - // create metadata that specifies that the switch is unpredictable. - // Don't bother if not optimizing because that metadata would not be used. + // If the switch has a condition wrapped by __builtin_unpredictable or + // __builtin_consistent, create metadata that specifies that the switch is + // unpredictable or consistent correspondingly. Don't bother if not optimizing + // because that metadata would not be used. auto *Call = dyn_cast(S.getCond()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (FD) { llvm::MDBuilder MDHelper(getLLVMContext()); - SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, - MDHelper.createUnpredictable()); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) + SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, + MDHelper.createUnpredictable()); + if (FD->getBuiltinID() == Builtin::BI__builtin_consistent) + SwitchInsn->setMetadata(llvm::LLVMContext::MD_consistent, + MDHelper.createConsistent()); } } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 0f2b9055b88eb..a372913f8732c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1908,16 +1908,21 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::MDNode *Weights = nullptr; llvm::MDNode *Unpredictable = nullptr; + llvm::MDNode *Consistent = nullptr; - // If the branch has a condition wrapped by __builtin_unpredictable, - // create metadata that specifies that the branch is unpredictable. - // Don't bother if not optimizing because that metadata would not be used. + // If the branch has a condition wrapped by __builtin_unpredictable or + // __builtin_consistent, create metadata that specifies that the branch is + // unpredictable or consistent correspondingly. Don't bother if not optimizing + // because that metadata would not be used. auto *Call = dyn_cast(Cond->IgnoreImpCasts()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (FD) { llvm::MDBuilder MDHelper(getLLVMContext()); - Unpredictable = MDHelper.createUnpredictable(); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) + Unpredictable = MDHelper.createUnpredictable(); + if (FD->getBuiltinID() == Builtin::BI__builtin_consistent) + Consistent = MDHelper.createConsistent(); } } @@ -1932,7 +1937,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount); } - Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable); + Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable, + Consistent); } /// ErrorUnsupported - Print out an error that codegen doesn't support the diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp index 4a56156de4b27..3de5c6676a732 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -63,12 +63,13 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call, } case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: case Builtin::BI__builtin_expect: case Builtin::BI__builtin_expect_with_probability: case Builtin::BI__builtin_assume_aligned: case Builtin::BI__builtin_addressof: case Builtin::BI__builtin_function_start: { - // For __builtin_unpredictable, __builtin_expect, + // For __builtin_unpredictable, __builtin_consistent, __builtin_expect, // __builtin_expect_with_probability and __builtin_assume_aligned, // just return the value of the subexpression. // __builtin_addressof is going from a reference to a pointer, but those diff --git a/clang/test/CodeGen/builtin-consistent.c b/clang/test/CodeGen/builtin-consistent.c new file mode 100644 index 0000000000000..3f1be494acf14 --- /dev/null +++ b/clang/test/CodeGen/builtin-consistent.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -disable-llvm-passes -o - %s -O1 | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=CHECK_O0 + +void f(void); +void g(void); +void consistent_branch(int x) { +// CHECK-LABEL: define{{.*}} void @consistent_branch( +// CHECK-NOT: builtin_consistent +// CHECK: !consistent [[METADATA:.+]] +// CHECK_O0-NOT: builtin_consistent +// CHECK_O0-NOT: !consistent + if (__builtin_consistent(x > 0)) + f(); + + if (x || __builtin_consistent(x != 0)) + g(); +} + +int consistent_switch(int x) { +// CHECK-LABEL: @consistent_switch( +// CHECK-NOT: builtin_consistent +// CHECK: !consistent [[METADATA:.+]] +// CHECK_O0-NOT: builtin_consistent +// CHECK_O0-NOT: !consistent + switch(__builtin_consistent(x)) { + default: + return x; + case 0: + case 1: + case 2: + return 1; + case 3: + return x-1; + }; +} +// CHECK: [[METADATA]] = !{i1 true} + diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index bd72ac23fc9c0..1fa27026d80f8 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -114,6 +114,7 @@ class MachineInstr // this instruction. Unpredictable = 1 << 16, // Instruction with unpredictable condition. NoConvergent = 1 << 17, // Call does not require convergence guarantees. + Consistent = 1 << 18, // Instruction condition behaves consistently. }; private: diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 4df56aac4aa17..58680867953f8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -398,6 +398,8 @@ struct SDNodeFlags { bool NoFPExcept : 1; // Instructions with attached 'unpredictable' metadata on IR level. bool Unpredictable : 1; + // Instructions with attached 'consistent' metadata on IR level. + bool Consistent : 1; public: /// Default constructor turns off all optimization flags. @@ -405,7 +407,8 @@ struct SDNodeFlags { : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NonNeg(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} + AllowReassociation(false), NoFPExcept(false), Unpredictable(false), + Consistent(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -432,6 +435,7 @@ struct SDNodeFlags { void setAllowReassociation(bool b) { AllowReassociation = b; } void setNoFPExcept(bool b) { NoFPExcept = b; } void setUnpredictable(bool b) { Unpredictable = b; } + void setConsistent(bool b) { Consistent = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -447,6 +451,7 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } bool hasUnpredictable() const { return Unpredictable; } + bool hasConsistent() const { return Consistent; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. @@ -464,6 +469,7 @@ struct SDNodeFlags { AllowReassociation &= Flags.AllowReassociation; NoFPExcept &= Flags.NoFPExcept; Unpredictable &= Flags.Unpredictable; + Consistent &= Flags.Consistent; } }; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8e7499ac626a7..e893baa90c14d 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -698,7 +698,8 @@ class TargetInstrInfo : public MCInstrInfo { /// If \p BytesRemoved is non-null, report the change in code size from the /// removed instructions. virtual unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const { + int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const { llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!"); } @@ -718,8 +719,8 @@ class TargetInstrInfo : public MCInstrInfo { virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const { + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const { llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!"); } diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index b375d0f091206..6dd0627c028bd 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -51,3 +51,4 @@ LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) +LLVM_FIXED_MD_KIND(MD_consistent, "consistent", 40) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index e3c4e76f90a4c..0cd6a73408457 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1064,15 +1064,18 @@ class IRBuilderBase { //===--------------------------------------------------------------------===// private: - /// Helper to add branch weight and unpredictable metadata onto an - /// instruction. + /// Helper to add branch weight, unpredictable and consistent metadata onto + /// an instruction. /// \returns The annotated instruction. template - InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) { + InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpred, + MDNode *Consist) { if (Weights) I->setMetadata(LLVMContext::MD_prof, Weights); - if (Unpredictable) - I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable); + if (Unpred) + I->setMetadata(LLVMContext::MD_unpredictable, Unpred); + if (Consist) + I->setMetadata(LLVMContext::MD_consistent, Consist); return I; } @@ -1110,9 +1113,10 @@ class IRBuilderBase { /// instruction. BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights = nullptr, - MDNode *Unpredictable = nullptr) { + MDNode *Unpredictable = nullptr, + MDNode *Consistent = nullptr) { return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond), - BranchWeights, Unpredictable)); + BranchWeights, Unpredictable, Consistent)); } /// Create a conditional 'br Cond, TrueDest, FalseDest' @@ -1121,7 +1125,8 @@ class IRBuilderBase { Instruction *MDSrc) { BranchInst *Br = BranchInst::Create(True, False, Cond); if (MDSrc) { - unsigned WL[4] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable, + unsigned WL[5] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable, + LLVMContext::MD_consistent, LLVMContext::MD_make_implicit, LLVMContext::MD_dbg}; Br->copyMetadata(*MDSrc, WL); } @@ -1133,9 +1138,10 @@ class IRBuilderBase { /// allocation). SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10, MDNode *BranchWeights = nullptr, - MDNode *Unpredictable = nullptr) { + MDNode *Unpredictable = nullptr, + MDNode *Consistent = nullptr) { return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases), - BranchWeights, Unpredictable)); + BranchWeights, Unpredictable, Consistent)); } /// Create an indirect branch instruction with the specified address diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 39165453de16b..f3ec99568cfa4 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -67,6 +67,9 @@ class MDBuilder { /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); + /// Return metadata specifying that a branch or switch behaves consistently. + MDNode *createConsistent(); + /// Return metadata containing the entry \p Count for a function, a boolean /// \Synthetic indicating whether the counts were synthetized, and the /// GUIDs stored in \p Imports that need to be imported for sample PGO, to diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 0801296cab49f..e8fca44a816d9 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -465,8 +465,10 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->reverseBranchCondition(Cond)) { - TII->removeBranch(*CurMBB); - TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); + bool IsConsistent = false; + TII->removeBranch(*CurMBB, nullptr, &IsConsistent); + TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl, nullptr, + IsConsistent); return; } } @@ -1116,12 +1118,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { + bool IsConsistent = false; DebugLoc dl = PBB->findBranchDebugLoc(); - TII->removeBranch(*PBB); + TII->removeBranch(*PBB, nullptr, &IsConsistent); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, - NewCond, dl); + TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, NewCond, + dl, nullptr, IsConsistent); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB)); @@ -1443,9 +1446,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1457,9 +1462,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (PriorTBB == MBB) { SmallVector NewPriorCond(PriorCond); if (!TII->reverseBranchCondition(NewPriorCond)) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1495,9 +1502,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl, nullptr, + IsConsistent); // Move this block to the end of the function. MBB->moveAfter(&MF.back()); @@ -1558,9 +1567,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector NewCond(CurCond); if (!TII->reverseBranchCondition(NewCond)) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(*MBB); - TII->removeBranch(*MBB); - TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + TII->removeBranch(*MBB, nullptr, &IsConsistent); + TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1606,9 +1617,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { assert(!PriorFBB && "Machine CFG out of date!"); PriorFBB = MBB; } + bool IsConsistent = false; DebugLoc pdl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl, + nullptr, IsConsistent); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1654,7 +1667,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } // Add the branch back if the block is more than just an uncond branch. - TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl); + TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl, nullptr, 0); } } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index fee237104022e..75db4141e486f 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -805,6 +805,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "unpredictable "; if (MI.getFlag(MachineInstr::NoConvergent)) OS << "noconvergent "; + if (MI.getFlag(MachineInstr::Consistent)) + OS << "consistent "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index d9e22685faf5f..03cc56313d54d 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -719,23 +719,25 @@ void MachineBasicBlock::updateTerminator( // If the unconditional successor block is not the current layout // successor, insert a branch to jump to it. if (!isLayoutSuccessor(PreviousLayoutSuccessor)) - TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL, + nullptr, 0); } return; } if (FBB) { + bool IsConsistent = false; // The block has a non-fallthrough conditional branch. If one of its // successors is its layout successor, rewrite it to a fallthrough // conditional branch. if (isLayoutSuccessor(TBB)) { if (TII->reverseBranchCondition(Cond)) return; - TII->removeBranch(*this); - TII->insertBranch(*this, FBB, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, FBB, nullptr, Cond, DL, nullptr, IsConsistent); } else if (isLayoutSuccessor(FBB)) { - TII->removeBranch(*this); - TII->insertBranch(*this, TBB, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, TBB, nullptr, Cond, DL, nullptr, IsConsistent); } return; } @@ -757,6 +759,7 @@ void MachineBasicBlock::updateTerminator( return; } + bool IsConsistent = false; // The block has a fallthrough conditional branch. if (isLayoutSuccessor(TBB)) { if (TII->reverseBranchCondition(Cond)) { @@ -765,11 +768,13 @@ void MachineBasicBlock::updateTerminator( TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); return; } - TII->removeBranch(*this); - TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL, + nullptr, IsConsistent); } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) { - TII->removeBranch(*this); - TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL, nullptr, + IsConsistent); } } @@ -1218,7 +1223,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); SmallVector Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); - TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); + TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL, nullptr, 0); } // Fix PHI nodes in Succ so they refer to NMBB instead of this. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 9e7b4df2576fe..520a31ff8e1be 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -586,6 +586,9 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { if (I.getMetadata(LLVMContext::MD_unpredictable)) MIFlags |= MachineInstr::MIFlag::Unpredictable; + if (I.getMetadata(LLVMContext::MD_consistent)) + MIFlags |= MachineInstr::MIFlag::Consistent; + return MIFlags; } @@ -1693,6 +1696,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nofpexcept "; if (getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (getFlag(MachineInstr::Consistent)) + OS << "consistent "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a867d88f76c0c..3dec1f636a6f1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17702,6 +17702,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } SDValue DAGCombiner::visitBRCOND(SDNode *N) { + unsigned BrOpcode = N->getOpcode(); + SDNodeFlags Flags; + Flags.setConsistent(N->getFlags().hasConsistent()); SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); @@ -17709,8 +17712,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are // nondeterministic jumps). if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) { - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, - N1->getOperand(0), N2); + return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, Chain, N1->getOperand(0), + N2, Flags); } // Variant of the previous fold where there is a SETCC in between: @@ -17758,8 +17761,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Updated) return DAG.getNode( - ISD::BRCOND, SDLoc(N), MVT::Other, Chain, - DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2); + BrOpcode, SDLoc(N), MVT::Other, Chain, + DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2, + Flags); } // If N is a constant we could fold this into a fallthrough or unconditional @@ -17773,9 +17777,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - Chain, N1.getOperand(2), - N1.getOperand(0), N1.getOperand(1), N2); + SDValue Ops[] = {Chain, N1.getOperand(2), N1.getOperand(0), + N1.getOperand(1), N2}; + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Ops, Flags); } if (N1.hasOneUse()) { @@ -17783,8 +17787,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, - ChainHandle.getValue(), NewN1, N2); + return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, ChainHandle.getValue(), + NewN1, N2, Flags); } return SDValue(); @@ -17906,11 +17910,13 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc - if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - N->getOperand(0), Simp.getOperand(2), - Simp.getOperand(0), Simp.getOperand(1), - N->getOperand(4)); + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) { + SDNodeFlags Flags; + Flags.setConsistent(N->getFlags().hasConsistent()); + SDValue Ops[] = {N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), + Simp.getOperand(1), N->getOperand(4)}; + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, Ops, Flags); + } return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a27febe15db83..406fae0f22aaf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1083,6 +1083,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setFlag(MachineInstr::MIFlag::Unpredictable); } + if (Node->getFlags().hasConsistent()) + MIB.getInstr()->setFlag(MachineInstr::MIFlag::Consistent); + // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ad5a4506efbd8..433204f4bc188 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1061,7 +1061,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { : Opc == ISD::SETCCCARRY ? 3 : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 : 1; - unsigned CompareOperand = Opc == ISD::BR_CC ? 2 + unsigned CompareOperand = (Opc == ISD::BR_CC) ? 2 : Opc == ISD::STRICT_FSETCC ? 1 : Opc == ISD::STRICT_FSETCCS ? 1 : 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index aab0d5c5a348b..ad8ae961a57fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2548,7 +2548,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { } // Emit the branch for this block. - visitSwitchCase(SL->SwitchCases[0], BrMBB); + visitSwitchCase(SL->SwitchCases[0], BrMBB, + I.hasMetadata(LLVMContext::MD_consistent)); SL->SwitchCases.erase(SL->SwitchCases.begin()); return; } @@ -2568,13 +2569,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. - visitSwitchCase(CB, BrMBB); + visitSwitchCase(CB, BrMBB, I.hasMetadata(LLVMContext::MD_consistent)); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + bool IsConsistent) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = CB.DL; @@ -2652,9 +2654,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, getControlRoot(), Cond, - DAG.getBasicBlock(CB.TrueBB)); + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), + Cond, DAG.getBasicBlock(CB.TrueBB), Flags); setValue(CurInst, BrCond); @@ -2887,7 +2890,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + bool IsConsistent) { SDLoc dl = getCurSDLoc(); // Subtract the minimum value. @@ -2935,9 +2939,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, RangeSub.getValueType()), RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), ISD::SETUGT); - + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, - DAG.getBasicBlock(B.Default)); + DAG.getBasicBlock(B.Default), Flags); } // Avoid emitting unnecessary branches to the next block. @@ -3404,6 +3409,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Flags.setUnpredictable( cast(I).getMetadata(LLVMContext::MD_unpredictable)); + Flags.setConsistent( + cast(I).getMetadata(LLVMContext::MD_consistent)); + // Min/max matching is only viable if all output VTs are the same. if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -11357,9 +11365,12 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { + const Value *Cond = SI.getCondition(); + bool IsConsistent = SI.getMetadata(LLVMContext::MD_consistent); MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI(W.MBB); @@ -11412,9 +11423,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, - DAG.getBasicBlock(Small.MBB)); + DAG.getBasicBlock(Small.MBB), Flags); // Insert the false branch. BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, DAG.getBasicBlock(DefaultMBB)); @@ -11571,7 +11584,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { - visitBitTestHeader(*BTB, SwitchMBB); + visitBitTestHeader(*BTB, SwitchMBB, IsConsistent); BTB->Emitted = true; } break; @@ -11602,7 +11615,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) - visitSwitchCase(CB, SwitchMBB); + visitSwitchCase(CB, SwitchMBB, IsConsistent); else SL->SwitchCases.push_back(CB); @@ -11627,7 +11640,7 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, - Value *Cond, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); @@ -11729,7 +11742,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, WorkList.push_back( {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(Cond); + ExportFromCurrentBlock(SI.getCondition()); } // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a @@ -11745,15 +11758,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, WorkList.push_back( {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(Cond); + ExportFromCurrentBlock(SI.getCondition()); } // Create the CaseBlock record that will be used to lower the branch. - CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - getCurSDLoc(), LeftProb, RightProb); + CaseBlock CB(ISD::SETLT, SI.getCondition(), Pivot, nullptr, LeftMBB, RightMBB, + W.MBB, getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) - visitSwitchCase(CB, SwitchMBB); + visitSwitchCase(CB, SwitchMBB, SI.getMetadata(LLVMContext::MD_consistent)); else SL->SwitchCases.push_back(CB); } @@ -11815,7 +11828,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, nullptr, nullptr, TopCaseProb.getCompl()}; - lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + lowerWorkItem(W, SI, SwitchMBB, PeeledSwitchMBB); Clusters.erase(PeeledCaseIt); for (CaseCluster &CC : Clusters) { @@ -11908,11 +11921,11 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None && !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. - splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + splitWorkItem(WorkList, W, SI, SwitchMBB); continue; } - lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + lowerWorkItem(W, SI, SwitchMBB, DefaultMBB); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a97884f0efb9a..a426c1c871fc9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -222,11 +222,11 @@ class SelectionDAGBuilder { /// Emit comparison and split W into two subtrees. void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, - const SwitchCG::SwitchWorkListItem &W, Value *Cond, - MachineBasicBlock *SwitchMBB); + const SwitchCG::SwitchWorkListItem &W, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB); /// Lower W. - void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, + void lowerWorkItem(SwitchCG::SwitchWorkListItem W, const SwitchInst &SI, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); @@ -525,12 +525,14 @@ class SelectionDAGBuilder { BranchProbability Prob = BranchProbability::getUnknown()); public: - void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB, + bool IsConsistent = false); void visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB); void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(SwitchCG::BitTestBlock &B, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + bool IsConsistent = false); void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, BranchProbability BranchProbToNext, unsigned Reg, SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 78cc60084068a..ad4e4850bdc75 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -624,6 +624,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasNoFPExcept()) OS << " nofpexcept"; + if (getFlags().hasConsistent()) + OS << " consistent"; + if (const MachineSDNode *MN = dyn_cast(this)) { if (!MN->memoperands_empty()) { OS << "<"; diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index b321d8b325fe0..9ad1d24f79a69 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1125,7 +1125,8 @@ Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, if (MDFrom) { MDNode *Prof = MDFrom->getMetadata(LLVMContext::MD_prof); MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable); - Sel = addBranchMetadata(Sel, Prof, Unpred); + MDNode *Consist = MDFrom->getMetadata(LLVMContext::MD_consistent); + Sel = addBranchMetadata(Sel, Prof, Unpred, Consist); } if (isa(Sel)) setFPAttrs(Sel, nullptr /* MDNode* */, FMF); diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 2490b3012bdc2..ec0e2ec93d30e 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -56,6 +56,11 @@ MDNode *MDBuilder::createUnpredictable() { return MDNode::get(Context, std::nullopt); } +MDNode *MDBuilder::createConsistent() { + return MDNode::get( + Context, createConstant(ConstantInt::get(Type::getInt1Ty(Context), 1))); +} + MDNode *MDBuilder::createFunctionEntryCount( uint64_t Count, bool Synthetic, const DenseSet *Imports) { diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp index da72e35a248eb..10f7f6e9923dd 100644 --- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -132,7 +132,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { CC = AArch64CC::MI; break; } - return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getFlag(MachineInstr::MIFlag::Consistent) && + TII->getSubtarget().hasHBC() + ? TII->get(AArch64::BCcc) + : TII->get(AArch64::Bcc)) .addImm(CC) .addMBB(TargetMBB); } diff --git a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index 1c20e24e41d7e..c412c8d76aed0 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -149,7 +149,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( if (Term == MBB->end()) return nullptr; - if (Term->getOpcode() != AArch64::Bcc) + if (Term->getOpcode() != AArch64::Bcc && Term->getOpcode() != AArch64::BCcc) return nullptr; // Since we may modify cmp of this MBB, make sure NZCV does not live out. diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 9b8162ce8dd4d..76033c746c280 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2449,16 +2449,30 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // Emit the extra branch for FCMP_UEQ and FCMP_ONE. if (ExtraCC != AArch64CC::AL) { + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) + // For branches with consistent metadata emit conditional branches + // with a hint that it will behave very consistently if target + // supports HBC + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(AArch64::BCcc)) + .addImm(ExtraCC) + .addMBB(TBB); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) + .addImm(ExtraCC) + .addMBB(TBB); + } + // Emit the branch. + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc)) + .addImm(CC) + .addMBB(TBB); + } else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(ExtraCC) + .addImm(CC) .addMBB(TBB); } - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2485,10 +2499,17 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { if (!CondReg) return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) { + // Emit conditional branch with a consistent behaviour hint + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc)) + .addImm(CC) + .addMBB(TBB); + } else { + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); + } finishCondBranch(BI->getParent(), TBB, FBB); return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3bff2845b7a13..945328d0e7e38 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2346,6 +2346,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::LOADgot) MAKE_CASE(AArch64ISD::RET_GLUE) MAKE_CASE(AArch64ISD::BRCOND) + MAKE_CASE(AArch64ISD::BRCCOND) MAKE_CASE(AArch64ISD::CSEL) MAKE_CASE(AArch64ISD::CSINV) MAKE_CASE(AArch64ISD::CSNEG) @@ -8604,6 +8605,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + SDNodeFlags Flags; + bool IsConsistent = Op.getNode()->getFlags().hasConsistent(); + Flags.setConsistent(IsConsistent); + unsigned BRCondOpc = IsConsistent && Subtarget->hasHBC() ? AArch64ISD::BRCCOND + : AArch64ISD::BRCOND; MachineFunction &MF = DAG.getMachineFunction(); // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions @@ -8643,8 +8649,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { OFCC = getInvertedCondCode(OFCC); SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Overflow); + SDValue Ops[] = {Chain, Dest, CCVal, Overflow}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } if (LHS.getValueType().isInteger()) { @@ -8665,12 +8671,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), dl, MVT::i64), - Dest); + SDValue Ops[] = {Chain, Test, + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest}; + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags); } - return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest, + Flags); } else if (CC == ISD::SETNE) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is @@ -8681,20 +8688,22 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), dl, MVT::i64), - Dest); + SDValue Ops[] = {Chain, Test, + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest}; + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags); } - return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest, + Flags); } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) { // Don't combine AND since emitComparison converts the AND to an ANDS // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t SignBitPos; std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); - return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); + SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64), + Dest}; + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags); } } if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && @@ -8704,14 +8713,15 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // becomes redundant. This would also increase register pressure. uint64_t SignBitPos; std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); - return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); + SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64), + Dest}; + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags); } SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Cmp); + SDValue Ops[] = {Chain, Dest, CCVal, Cmp}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || @@ -8723,12 +8733,12 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); - SDValue BR1 = - DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); + SDValue BR1Ops[] = {Chain, Dest, CC1Val, Cmp}; + SDValue BR1 = DAG.getNode(BRCondOpc, dl, MVT::Other, BR1Ops, Flags); if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, - Cmp); + SDValue Ops[] = {BR1, Dest, CC2Val, Cmp}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } return BR1; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7332a95615a4d..a6968587a2f96 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -72,6 +72,7 @@ enum NodeType : unsigned { // Offset Table, TLS record). RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. BRCOND, // Conditional branch instruction; "b.cond". + BRCCOND, // Hinted Conditional Branch "BC.cond". CSEL, CSINV, // Conditional select invert. CSNEG, // Conditional select negate. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index f88f5a240a1fd..a3015a31b83e7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -2089,10 +2089,10 @@ def am_brcond : Operand { let OperandType = "OPERAND_PCREL"; } -class BranchCond +class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), mnemonic, ".$cond\t$target", "", - [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { + [(node bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; let Uses = [NZCV]; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 6fdf5363bae29..7a24b11fc2b8d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -180,6 +180,7 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, default: llvm_unreachable("Unknown branch instruction?"); case AArch64::Bcc: + case AArch64::BCcc: Target = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); break; @@ -221,6 +222,7 @@ static unsigned getBranchDisplacementBits(unsigned Opc) { case AArch64::CBZX: return CBZDisplacementBits; case AArch64::Bcc: + case AArch64::BCcc: return BCCDisplacementBits; } } @@ -250,6 +252,7 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { case AArch64::CBZX: case AArch64::CBNZX: case AArch64::Bcc: + case AArch64::BCcc: return MI.getOperand(1).getMBB(); } } @@ -535,7 +538,10 @@ bool AArch64InstrInfo::reverseBranchCondition( } unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { + if (IsConsistent) + *IsConsistent = false; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; @@ -544,6 +550,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, !isCondBranchOpcode(I->getOpcode())) return 0; + if (I->getOpcode() == AArch64::BCcc) + if (IsConsistent) + *IsConsistent = true; + // Remove the branch. I->eraseFromParent(); @@ -561,6 +571,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, return 1; } + if (I->getOpcode() == AArch64::BCcc) + if (IsConsistent) + *IsConsistent = true; + // Remove the branch. I->eraseFromParent(); if (BytesRemoved) @@ -569,12 +583,16 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, return 2; } -void AArch64InstrInfo::instantiateCondBranch( - MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, - ArrayRef Cond) const { +void AArch64InstrInfo::instantiateCondBranch(MachineBasicBlock &MBB, + const DebugLoc &DL, + MachineBasicBlock *TBB, + ArrayRef Cond, + bool IsConsistent) const { if (Cond[0].getImm() != -1) { // Regular Bcc - BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); + BuildMI(&MBB, DL, get(IsConsistent ? AArch64::BCcc : AArch64::Bcc)) + .addImm(Cond[0].getImm()) + .addMBB(TBB); } else { // Folded compare-and-branch // Note that we use addOperand instead of addReg to keep the flags. @@ -586,9 +604,12 @@ void AArch64InstrInfo::instantiateCondBranch( } } -unsigned AArch64InstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -596,7 +617,7 @@ unsigned AArch64InstrInfo::insertBranch( if (Cond.empty()) // Unconditional branch? BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); else - instantiateCondBranch(MBB, DL, TBB, Cond); + instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent); if (BytesAdded) *BytesAdded = 4; @@ -605,7 +626,7 @@ unsigned AArch64InstrInfo::insertBranch( } // Two-way conditional branch. - instantiateCondBranch(MBB, DL, TBB, Cond); + instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent); BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); if (BytesAdded) @@ -1618,7 +1639,8 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { default: return -1; - case AArch64::Bcc: { + case AArch64::Bcc: + case AArch64::BCcc: { int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); assert(Idx >= 2); return Idx - 2; @@ -7873,6 +7895,7 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { default: llvm_unreachable("Unknown branch instruction?"); case AArch64::Bcc: + case AArch64::BCcc: return false; case AArch64::CBZW: case AArch64::CBZX: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index a934103c90cbf..0a513aa5254d3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -45,6 +45,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// always be able to get register info as well (through this method). const AArch64RegisterInfo &getRegisterInfo() const { return RI; } + const AArch64Subtarget &getSubtarget() const { return Subtarget; } + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; bool isAsCheapAsAMove(const MachineInstr &MI) const override; @@ -238,12 +240,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, @@ -403,7 +405,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, - ArrayRef Cond) const; + ArrayRef Cond, + bool consistent) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo &MRI) const; bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, @@ -510,6 +513,7 @@ static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } static inline bool isCondBranchOpcode(int Opc) { switch (Opc) { case AArch64::Bcc: + case AArch64::BCcc: case AArch64::CBZW: case AArch64::CBZX: case AArch64::CBNZW: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 290c79f7bacdb..198ae55aea218 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -637,6 +637,8 @@ def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; +def AArch64brccond : SDNode<"AArch64ISD::BRCCOND", SDT_AArch64Brcond, + [SDNPHasChain]>; def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, [SDNPHasChain]>; def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, @@ -2782,12 +2784,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// -def Bcc : BranchCond<0, "b">; +def Bcc : BranchCond<0, "b", AArch64brcond>; // Armv8.8-A variant form which hints to the branch predictor that // this branch is very likely to go the same way nearly all the time // (even though it is not known at compile time _which_ way that is). -def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; +def BCcc : BranchCond<1, "bc", AArch64brccond>, Requires<[HasHBC]>; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 05d60872bf51a..a0b96f74a0289 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -21,7 +21,8 @@ using namespace llvm; /// CMN, CMP, TST followed by Bcc static bool isArithmeticBccPair(const MachineInstr *FirstMI, const MachineInstr &SecondMI, bool CmpOnly) { - if (SecondMI.getOpcode() != AArch64::Bcc) + if (SecondMI.getOpcode() != AArch64::Bcc && + SecondMI.getOpcode() != AArch64::BCcc) return false; // Assume the 1st instr to be a wildcard if it is unspecified. diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index 1494312886a40..0624a0b6b0264 100644 --- a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -137,7 +137,7 @@ bool AArch64RedundantCopyElimination::knownRegValInBlock( } // Otherwise, must be a conditional branch. - if (Opc != AArch64::Bcc) + if (Opc != AArch64::Bcc && Opc != AArch64::BCcc) return false; // Must be an equality check (i.e., == or !=). diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index bdaae4dd724d5..796f75d040c47 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1675,9 +1675,16 @@ bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( AArch64CC::CondCode CC1, CC2; changeFCMPPredToAArch64CC(static_cast(Pred), CC1, CC2); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); - if (CC2 != AArch64CC::AL) - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC1).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); + if (CC2 != AArch64CC::AL) { + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC2).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + } I.eraseFromParent(); return true; } @@ -1790,7 +1797,10 @@ bool AArch64InstructionSelector::selectCompareBranchFedByICmp( emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( static_cast(PredOp.getPredicate())); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); I.eraseFromParent(); return true; } @@ -1821,9 +1831,12 @@ bool AArch64InstructionSelector::selectCompareBranch( auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - auto Bcc = MIB.buildInstr(AArch64::Bcc) - .addImm(AArch64CC::NE) - .addMBB(I.getOperand(1).getMBB()); + auto Bcc = + MIB.buildInstr(I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC() + ? AArch64::BCcc + : AArch64::Bcc) + .addImm(AArch64CC::NE) + .addMBB(I.getOperand(1).getMBB()); I.eraseFromParent(); return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); } diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 7f874b245b8f4..cf9e601a73072 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -730,8 +730,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -773,8 +773,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, } } -unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); // Note : we leave PRED* instructions there. diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index f720e4656348c..9d9acb576bfea 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -170,11 +170,11 @@ class R600InstrInfo final : public R600GenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool isPredicated(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 027b695c3bb1a..a000b567c88c0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3036,8 +3036,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); } -unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { unsigned Count = 0; unsigned RemovedSize = 0; for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) { @@ -3066,8 +3066,8 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (!FBB && Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) .addMBB(TBB); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 29f549fc29a3c..aefd6f4fadfc6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -338,13 +338,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition( SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index fe78a98837cf9..bceb525927f3c 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -251,8 +251,8 @@ bool ARCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return false; } -unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "Code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) @@ -370,7 +370,8 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "Code size not handled."); // Shouldn't be a fall through. diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h index c55c9535ec296..031fbcd5d0487 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -57,11 +57,11 @@ class ARCInstrInfo : public ARCGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &, - int *BytesAdded = nullptr) const override; + const DebugLoc &, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4c78379ccf5c4..d249b165dfc1f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -469,7 +469,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -499,8 +500,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); int BOpc = !AFI->isThumbFunction() diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 5efcc1a0d9fc0..9fc6b12b52ce8 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -143,12 +143,12 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 2640ad9e36267..3829706dd8e50 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -398,7 +398,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; @@ -435,8 +436,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, return Count; } -unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h index 290177f5eec66..209b244b786aa 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -99,10 +99,10 @@ class AVRInstrInfo : public AVRGenInstrInfo { bool AllowModify = false) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 2209f1f1462b4..bc852e61d033f 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -221,8 +221,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); // Shouldn't be a fall through. @@ -238,8 +238,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB, llvm_unreachable("Unexpected conditional branch"); } -unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h index 354aca1bd2f93..d9e40493d267d 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -52,12 +52,13 @@ class BPFInstrInfo : public BPFGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; + private: void expandMEMCPY(MachineBasicBlock::iterator) const; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index e5581bcdc3975..3b2f7bc722bc1 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -110,8 +110,8 @@ bool CSKYInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -151,9 +151,12 @@ CSKYInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { return MI.getOperand(NumOp - 1).getMBB(); } -unsigned CSKYInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned CSKYInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index dbb69a7a87980..5a44544b77841 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -59,16 +59,16 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6f0210763bc5f..a4bc553e82d85 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -603,7 +603,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); LLVM_DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB)); @@ -629,9 +630,9 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { - unsigned BOpc = Hexagon::J2_jump; + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { + unsigned BOpc = Hexagon::J2_jump; unsigned BccOpc = Hexagon::J2_jumpt; assert(validateBranchCond(Cond) && "Invalid branching condition"); assert(TBB && "insertBranch must not be told to insert a fallthrough"); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 0bc0877f6e706..b08cc389d2fca 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -111,8 +111,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// Remove the branching code at the end of the specific MBB. /// This is only invoked in cases where analyzeBranch returns success. It /// returns the number of instructions that were removed. - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; /// Insert branch code into the end of the specified MachineBasicBlock. /// The operands to this method are the same as those @@ -126,8 +126,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// merging needs to be disabled. unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; /// Analyze loop L, which must be a single-basic-block loop, and if the /// conditions can be understood enough produce a PipelinerLoopInfo object. diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index aa7e8846406dd..02e1fe3d5d576 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -657,8 +657,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock, MachineBasicBlock *FalseBlock, ArrayRef Condition, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TrueBlock && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -685,8 +685,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator Instruction = MBB.end(); diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index 62f6240c6e468..54a4e396fc70a 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -89,8 +89,8 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { SmallVectorImpl &Condition, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; // For a comparison instruction, return the source registers in SrcReg and // SrcReg2 if having two register operands, and the value it compares against @@ -138,9 +138,9 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock, MachineBasicBlock *FalseBlock, - ArrayRef Condition, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + ArrayRef Condition, const DebugLoc &DL, + int *BytesAdded = nullptr, + bool IsConsistent = false) const override; }; static inline bool isSPLSOpcode(unsigned Opcode) { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index ddd1c9943fac0..fd193c9680fba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -325,7 +325,8 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -357,9 +358,12 @@ unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, // Inserts a branch into the end of the specific MachineBasicBlock, returning // the number of instructions inserted. -unsigned LoongArchInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index 4b145d0baa417..2d198447cb4a6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -62,13 +62,13 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &dl, - int *BytesAdded = nullptr) const override; + const DebugLoc &dl, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index d56fef9e9029a..aad64fc72c411 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -254,8 +254,8 @@ bool M68kInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, AllowModify); } -unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -277,9 +277,12 @@ unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, return Count; } -unsigned M68kInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned M68kInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index 577967f2fdfc9..afa2faf71de6a 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -261,13 +261,13 @@ class M68kInstrInfo : public M68kGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 7405716516643..fb983245c6039 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -104,7 +104,8 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -254,8 +255,8 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index b8d015a21cd15..397e74da63b22 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -61,12 +61,12 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; int64_t getFramePoppedByCallee(const MachineInstr &I) const { assert(isFrameInstr(I) && "Not a frame instruction"); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 392cc15d7943a..99addbd15d7b1 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -135,8 +135,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -165,8 +165,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB, return 1; } -unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index dc4b9d99b39d2..c193ba2d560f2 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -65,13 +65,13 @@ class MipsInstrInfo : public MipsGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index b0d792b5ee3fe..e1690c6306280 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -146,8 +146,8 @@ bool NVPTXInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -176,8 +176,8 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); // Shouldn't be a fall through. diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index cd068a0939300..69a6d919c6214 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -60,12 +60,12 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index e1bb2f72657f8..4f70c6d32fd23 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1440,8 +1440,8 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -1476,8 +1476,8 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 1f59e994d9cb1..50e476258388d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -396,12 +396,12 @@ class PPCInstrInfo : public PPCGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; // Select analysis. bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 9271f807a8483..c8a0db51b270f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1018,8 +1018,8 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -1052,9 +1052,12 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, // Inserts a branch into the end of the specific MachineBasicBlock, returning // the number of instructions inserted. -unsigned RISCVInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned RISCVInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index b33d8c2856159..96b14fb662d08 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -108,16 +108,16 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &dl, - int *BytesAdded = nullptr) const override; + const DebugLoc &dl, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 42317453a2370..5c59295ef21a7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -200,8 +200,8 @@ bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // returns the number of instructions that were removed. // If \p BytesRemoved is non-null, report the change in code size from the // removed instructions. -unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { report_fatal_error("Branch removal not supported, as MBB info not propagated" " to OpPhi instructions. Try using -O0 instead."); } @@ -219,9 +219,12 @@ unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, // // The CFG information in MBB.Predecessors and MBB.Successors must be valid // before calling this function. -unsigned SPIRVInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned SPIRVInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { report_fatal_error("Branch insertion not supported, as MBB info not " "propagated to OpPhi instructions. Try using " "-O0 instead."); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index c01e30e109bd5..706846716ef93 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -41,13 +41,13 @@ class SPIRVInstrInfo : public SPIRVGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 90662cd87dcf1..db2236dca290c 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -328,8 +328,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() <= 3) && "Sparc branch conditions should have at most three components!"); @@ -364,8 +364,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; int Removed = 0; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index 7056d6babe17b..470199b3c2093 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -71,13 +71,13 @@ class SparcInstrInfo : public SparcGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index ac8c395f9064f..8377471ae85d8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -447,7 +447,8 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); // Most of the code and comments here are boilerplate. @@ -482,8 +483,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // In this function we output 32-bit branches, which should always // have enough range. They can be shortened and relaxed by later code // in the pipeline, if desired. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index bb883ea464d37..a2c8ecf344c5a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -238,12 +238,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index ebb9e21389c37..620e8ec0e7e4b 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -227,7 +227,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 0) && "VE branch conditions should have three component!"); @@ -288,8 +289,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h index 4fe56f24116f8..4b68534178c50 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -67,13 +67,13 @@ class VEInstrInfo : public VEGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 32a4accd040eb..6f4369a45b27d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -135,7 +135,8 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::instr_iterator I = MBB.instr_end(); @@ -156,9 +157,12 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB, return Count; } -unsigned WebAssemblyInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); if (Cond.empty()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index c1e1a790c60e2..7b6362b9dd36f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -56,12 +56,12 @@ class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 56e3ac79b5957..fd8fe2bbaed51 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3373,8 +3373,8 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, return true; } -unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -3400,8 +3400,8 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index b0a2d2b890743..c27713a95bc43 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -360,12 +360,12 @@ class X86InstrInfo final : public X86GenInstrInfo { TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, Register, Register, Register, int &, int &, int &) const override; diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index d8a8e2cddf154..dbfffb711f2bf 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -272,8 +272,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -302,8 +302,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned -XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { +unsigned XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h index 9bf7e2dcccb7d..ece4bfa0f23d9 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -56,11 +56,11 @@ class XCoreInstrInfo : public XCoreGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6009558efca06..29a41d012849e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5401,7 +5401,6 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); auto *BB = SI->getParent(); - // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; @@ -5465,7 +5464,9 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, Cmp = ConstantInt::getTrue(SI->getContext()); else Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); + BranchInst *NewBI = + Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest, nullptr, nullptr, + SI->getMetadata(LLVMContext::MD_consistent)); // Update weight for the newly-created conditional branch. if (hasBranchWeightMD(*SI)) { @@ -6675,8 +6676,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } else { Value *Cmp = Builder.CreateICmpULT( TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); - RangeCheckBranch = - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + RangeCheckBranch = Builder.CreateCondBr( + Cmp, LookupBB, SI->getDefaultDest(), nullptr, nullptr, + SI->getMetadata(LLVMContext::MD_consistent)); if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll index dc00c41892ba8..08beaa492d689 100644 --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s +; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -mattr=+hbc -verify-machineinstrs | FileCheck %s -check-prefix=HBC target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linaro-linux-gnueabi" @@ -13,9 +14,16 @@ define void @test_add_cbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: cmn w0, w1 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -32,9 +40,18 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: csel w8, w9, w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz_multiple_use: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: mov w9, #10 // =0xa +; HBC-NEXT: csel w8, w9, w8, ne +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 + L1: store i32 10, ptr %ptr, align 4 ret void @@ -50,9 +67,16 @@ define void @test_add_cbz_64(i64 %a, i64 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str x8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz_64: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: cmn x0, x1 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str x8, [x2] +; HBC-NEXT: ret %c = add nsw i64 %a, %b %d = icmp ne i64 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i64 0, ptr %ptr, align 4 ret void @@ -68,9 +92,16 @@ define void @test_and_cbz(i32 %a, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_and_cbz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: tst w0, #0x6 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str w8, [x1] +; HBC-NEXT: ret %c = and i32 %a, 6 %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -86,9 +117,16 @@ define void @test_bic_cbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_bic_cbnz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: bics wzr, w1, w0 +; HBC-NEXT: cset w8, ne +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = and i32 %a, %b %d = icmp eq i32 %c, %b - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -106,10 +144,19 @@ define void @test_add_tbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB5_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_tbz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: bc.pl .LBB5_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB5_2: // %L2 +; HBC-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp sge i32 %add, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %add, ptr %ptr, align 8 br label %L2 @@ -126,10 +173,19 @@ define void @test_subs_tbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB6_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_subs_tbz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w0, w1 +; HBC-NEXT: bc.pl .LBB6_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB6_2: // %L2 +; HBC-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp sge i32 %sub, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %sub, ptr %ptr, align 8 br label %L2 @@ -146,10 +202,19 @@ define void @test_add_tbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB7_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_tbnz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: bc.mi .LBB7_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB7_2: // %L2 +; HBC-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp slt i32 %add, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %add, ptr %ptr, align 8 br label %L2 @@ -166,10 +231,19 @@ define void @test_subs_tbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB8_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_subs_tbnz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w0, w1 +; HBC-NEXT: bc.mi .LBB8_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB8_2: // %L2 +; HBC-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp slt i32 %sub, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %sub, ptr %ptr, align 8 br label %L2 @@ -202,11 +276,32 @@ define void @test_call_clobber(i32 %unused, i32 %a) uwtable { ; CHECK-NEXT: .LBB9_2: // %if.then ; CHECK-NEXT: .cfi_restore_state ; CHECK-NEXT: bl foo +; +; HBC-LABEL: test_call_clobber: +; HBC: // %bb.0: // %entry +; HBC-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; HBC-NEXT: .cfi_def_cfa_offset 16 +; HBC-NEXT: .cfi_offset w19, -8 +; HBC-NEXT: .cfi_offset w30, -16 +; HBC-NEXT: .cfi_remember_state +; HBC-NEXT: and w19, w1, #0x6 +; HBC-NEXT: mov w0, w19 +; HBC-NEXT: bl bar +; HBC-NEXT: cbnz w19, .LBB9_2 +; HBC-NEXT: // %bb.1: // %if.end +; HBC-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; HBC-NEXT: .cfi_def_cfa_offset 0 +; HBC-NEXT: .cfi_restore w19 +; HBC-NEXT: .cfi_restore w30 +; HBC-NEXT: ret +; HBC-NEXT: .LBB9_2: // %if.then +; HBC-NEXT: .cfi_restore_state +; HBC-NEXT: bl foo entry: %c = and i32 %a, 6 call void @bar(i32 %c) %tobool = icmp eq i32 %c, 0 - br i1 %tobool, label %if.end, label %if.then + br i1 %tobool, label %if.end, label %if.then, !consistent !10 if.then: tail call void @foo() @@ -215,3 +310,5 @@ if.then: if.end: ret void } + +!10 = !{i1 true} diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index b63d540fb8e02..05d10dfb1a322 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+hbc -O3 < %s | FileCheck %s -check-prefix=HBC define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { ; CHECK-LABEL: loop1: @@ -68,16 +69,83 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: b.ne .LBB0_7 ; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup ; CHECK-NEXT: ret +; +; HBC-LABEL: loop1: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB0_8 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #6 +; HBC-NEXT: bc.hi .LBB0_3 +; HBC-NEXT: // %bb.2: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: b .LBB0_6 +; HBC-NEXT: .LBB0_3: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: add x12, x0, #4 +; HBC-NEXT: and x10, x11, #0x1fffffff8 +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: add x13, x1, #16 +; HBC-NEXT: add x8, x1, x10, lsl #2 +; HBC-NEXT: add x9, x0, x10 +; HBC-NEXT: mov x14, x10 +; HBC-NEXT: .LBB0_4: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp q1, q2, [x13, #-16] +; HBC-NEXT: subs x14, x14, #8 +; HBC-NEXT: add x13, x13, #32 +; HBC-NEXT: fcmgt v3.4s, v1.4s, v0.4s +; HBC-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; HBC-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; HBC-NEXT: fcmlt v6.4s, v2.4s, #0.0 +; HBC-NEXT: bit v1.16b, v0.16b, v3.16b +; HBC-NEXT: bit v2.16b, v0.16b, v4.16b +; HBC-NEXT: bic v1.16b, v1.16b, v5.16b +; HBC-NEXT: bic v2.16b, v2.16b, v6.16b +; HBC-NEXT: fcvtzs v1.4s, v1.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v1.4h, v1.4s +; HBC-NEXT: xtn v2.4h, v2.4s +; HBC-NEXT: xtn v1.8b, v1.8h +; HBC-NEXT: xtn v2.8b, v2.8h +; HBC-NEXT: mov v1.s[1], v2.s[0] +; HBC-NEXT: stur d1, [x12, #-4] +; HBC-NEXT: add x12, x12, #8 +; HBC-NEXT: bc.ne .LBB0_4 +; HBC-NEXT: // %bb.5: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.eq .LBB0_8 +; HBC-NEXT: .LBB0_6: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB0_7: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: ldr s1, [x8], #4 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s2, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: strb w12, [x9], #1 +; HBC-NEXT: bc.ne .LBB0_7 +; HBC-NEXT: .LBB0_8: // %for.cond.cleanup +; HBC-NEXT: ret entry: %cmp9 = icmp sgt i32 %width, 0 - br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 7 - br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph + br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph, !consistent !10 vector.ph: ; preds = %for.body.preheader %n.vec = and i64 %2, 8589934584 @@ -108,11 +176,11 @@ vector.body: ; preds = %vector.body, %vecto store <4 x i8> %13, ptr %14, align 1 %index.next = add nuw i64 %index, 8 %15 = icmp eq i64 %index.next, %n.vec - br i1 %15, label %middle.block, label %vector.body + br i1 %15, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21, !consistent !10 for.body.preheader21: ; preds = %for.body.preheader, %middle.block %i.012.ph = phi i32 [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -138,7 +206,7 @@ for.body: ; preds = %for.body.preheader2 %add.ptr2 = getelementptr inbounds i8, ptr %dst.addr.010, i64 1 %inc = add nuw nsw i32 %i.012, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -219,16 +287,94 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: cmp x11, x10 ; CHECK-NEXT: b.ne .LBB1_5 ; CHECK-NEXT: b .LBB1_7 +; +; HBC-LABEL: loop2: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB1_7 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB1_4 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: ubfiz x9, x8, #1, #32 +; HBC-NEXT: add x9, x9, #2 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: bc.ls .LBB1_8 +; HBC-NEXT: // %bb.3: // %vector.memcheck +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x9, x1 +; HBC-NEXT: b.ls .LBB1_8 +; HBC-NEXT: .LBB1_4: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB1_5: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB1_6: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8], #8 +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s2, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcvtzs w13, s2 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: add x9, x9, #2 +; HBC-NEXT: bc.ne .LBB1_6 +; HBC-NEXT: .LBB1_7: // %for.cond.cleanup +; HBC-NEXT: ret +; HBC-NEXT: .LBB1_8: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: add x8, x1, x10, lsl #3 +; HBC-NEXT: add x9, x0, x10, lsl #1 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: .LBB1_9: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v3.4s, v1.4s, v0.4s +; HBC-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; HBC-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; HBC-NEXT: bsl v3.16b, v0.16b, v1.16b +; HBC-NEXT: bsl v4.16b, v0.16b, v2.16b +; HBC-NEXT: fcmlt v1.4s, v2.4s, #0.0 +; HBC-NEXT: bic v2.16b, v3.16b, v5.16b +; HBC-NEXT: bic v1.16b, v4.16b, v1.16b +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: fcvtzs v1.4s, v1.4s +; HBC-NEXT: xtn v2.4h, v2.4s +; HBC-NEXT: xtn v1.4h, v1.4s +; HBC-NEXT: trn1 v1.8b, v2.8b, v1.8b +; HBC-NEXT: str d1, [x0], #8 +; HBC-NEXT: bc.ne .LBB1_9 +; HBC-NEXT: // %bb.10: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB1_5 +; HBC-NEXT: b .LBB1_7 entry: %cmp19 = icmp sgt i32 %width, 0 - br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -240,7 +386,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep24, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader35, label %vector.ph + br i1 %found.conflict, label %for.body.preheader35, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -274,11 +420,11 @@ vector.body: ; preds = %vector.body, %vecto store <8 x i8> %interleaved.vec, ptr %21, align 1 %index.next = add nuw i64 %index, 4 %22 = icmp eq i64 %index.next, %n.vec - br i1 %22, label %middle.block, label %vector.body + br i1 %22, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35, !consistent !10 for.body.preheader35: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.022.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -313,7 +459,7 @@ for.body: ; preds = %for.body.preheader3 %add.ptr6 = getelementptr inbounds i8, ptr %dst.addr.020, i64 2 %inc = add nuw nsw i32 %i.022, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -411,16 +557,111 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: b.ne .LBB2_8 ; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup ; CHECK-NEXT: ret +; +; HBC-LABEL: loop3: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB2_9 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB2_6 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: add x9, x8, w8, uxtw #1 +; HBC-NEXT: add x9, x9, #3 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: ccmp x9, x1, #0, hi +; HBC-NEXT: b.hi .LBB2_6 +; HBC-NEXT: // %bb.3: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: adrp x12, .LCPI2_0 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: ldr q1, [x12, :lo12:.LCPI2_0] +; HBC-NEXT: add x9, x10, x10, lsl #1 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: add x8, x1, x9, lsl #2 +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: .LBB2_4: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 +; HBC-NEXT: add x13, x0, #8 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v5.4s, v2.4s, v0.4s +; HBC-NEXT: fcmgt v6.4s, v3.4s, v0.4s +; HBC-NEXT: fcmgt v7.4s, v4.4s, v0.4s +; HBC-NEXT: fcmlt v16.4s, v2.4s, #0.0 +; HBC-NEXT: fcmlt v17.4s, v3.4s, #0.0 +; HBC-NEXT: bsl v5.16b, v0.16b, v2.16b +; HBC-NEXT: bsl v6.16b, v0.16b, v3.16b +; HBC-NEXT: bsl v7.16b, v0.16b, v4.16b +; HBC-NEXT: fcmlt v2.4s, v4.4s, #0.0 +; HBC-NEXT: bic v3.16b, v5.16b, v16.16b +; HBC-NEXT: bic v4.16b, v6.16b, v17.16b +; HBC-NEXT: bic v2.16b, v7.16b, v2.16b +; HBC-NEXT: fcvtzs v3.4s, v3.4s +; HBC-NEXT: fcvtzs v4.4s, v4.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v5.4h, v3.4s +; HBC-NEXT: xtn v6.4h, v4.4s +; HBC-NEXT: xtn v7.4h, v2.4s +; HBC-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b +; HBC-NEXT: st1 { v2.s }[2], [x13] +; HBC-NEXT: str d2, [x0], #12 +; HBC-NEXT: bc.ne .LBB2_4 +; HBC-NEXT: // %bb.5: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB2_7 +; HBC-NEXT: b .LBB2_9 +; HBC-NEXT: .LBB2_6: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB2_7: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB2_8: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8] +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s4, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: ldr s3, [x8, #8] +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: add x8, x8, #12 +; HBC-NEXT: fcsel s4, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcsel s2, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w13, s4 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: fcvtzs w14, s2 +; HBC-NEXT: strb w14, [x9, #2] +; HBC-NEXT: add x9, x9, #3 +; HBC-NEXT: bc.ne .LBB2_8 +; HBC-NEXT: .LBB2_9: // %for.cond.cleanup +; HBC-NEXT: ret entry: %cmp29 = icmp sgt i32 %width, 0 - br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -432,7 +673,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep34, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader46, label %vector.ph + br i1 %found.conflict, label %for.body.preheader46, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -474,11 +715,11 @@ vector.body: ; preds = %vector.body, %vecto store <12 x i8> %interleaved.vec, ptr %26, align 1 %index.next = add nuw i64 %index, 4 %29 = icmp eq i64 %index.next, %n.vec - br i1 %29, label %middle.block, label %vector.body + br i1 %29, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46, !consistent !10 for.body.preheader46: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.032.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -522,7 +763,7 @@ for.body: ; preds = %for.body.preheader4 %add.ptr10 = getelementptr inbounds i8, ptr %dst.addr.030, i64 3 %inc = add nuw nsw i32 %i.032, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -631,16 +872,122 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: cmp x11, x10 ; CHECK-NEXT: b.ne .LBB3_5 ; CHECK-NEXT: b .LBB3_7 +; +; HBC-LABEL: loop4: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB3_7 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB3_4 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: ubfiz x9, x8, #2, #32 +; HBC-NEXT: add x9, x9, #4 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: bc.ls .LBB3_8 +; HBC-NEXT: // %bb.3: // %vector.memcheck +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x9, x1 +; HBC-NEXT: b.ls .LBB3_8 +; HBC-NEXT: .LBB3_4: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB3_5: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB3_6: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8] +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s4, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: ldp s3, s5, [x8, #8] +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: add x8, x8, #16 +; HBC-NEXT: fcsel s4, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcsel s6, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w13, s4 +; HBC-NEXT: fcsel s3, s0, s6, mi +; HBC-NEXT: fcmp s5, s2 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: fcsel s2, s2, s5, gt +; HBC-NEXT: fcmp s5, #0.0 +; HBC-NEXT: fcvtzs w14, s3 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w14, [x9, #2] +; HBC-NEXT: fcvtzs w15, s2 +; HBC-NEXT: strb w15, [x9, #3] +; HBC-NEXT: add x9, x9, #4 +; HBC-NEXT: bc.ne .LBB3_6 +; HBC-NEXT: .LBB3_7: // %for.cond.cleanup +; HBC-NEXT: ret +; HBC-NEXT: .LBB3_8: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: adrp x12, .LCPI3_0 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: ldr q1, [x12, :lo12:.LCPI3_0] +; HBC-NEXT: add x8, x1, x10, lsl #4 +; HBC-NEXT: add x9, x0, x10, lsl #2 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: .LBB3_9: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v6.4s, v2.4s, v0.4s +; HBC-NEXT: fcmgt v7.4s, v3.4s, v0.4s +; HBC-NEXT: fcmgt v16.4s, v4.4s, v0.4s +; HBC-NEXT: fcmgt v17.4s, v5.4s, v0.4s +; HBC-NEXT: fcmlt v18.4s, v2.4s, #0.0 +; HBC-NEXT: fcmlt v19.4s, v3.4s, #0.0 +; HBC-NEXT: fcmlt v20.4s, v4.4s, #0.0 +; HBC-NEXT: bsl v6.16b, v0.16b, v2.16b +; HBC-NEXT: bsl v7.16b, v0.16b, v3.16b +; HBC-NEXT: bsl v16.16b, v0.16b, v4.16b +; HBC-NEXT: bsl v17.16b, v0.16b, v5.16b +; HBC-NEXT: fcmlt v2.4s, v5.4s, #0.0 +; HBC-NEXT: bic v3.16b, v6.16b, v18.16b +; HBC-NEXT: bic v4.16b, v7.16b, v19.16b +; HBC-NEXT: bic v5.16b, v16.16b, v20.16b +; HBC-NEXT: bic v2.16b, v17.16b, v2.16b +; HBC-NEXT: fcvtzs v3.4s, v3.4s +; HBC-NEXT: fcvtzs v4.4s, v4.4s +; HBC-NEXT: fcvtzs v5.4s, v5.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v16.4h, v3.4s +; HBC-NEXT: xtn v17.4h, v4.4s +; HBC-NEXT: xtn v18.4h, v5.4s +; HBC-NEXT: xtn v19.4h, v2.4s +; HBC-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; HBC-NEXT: str q2, [x0], #16 +; HBC-NEXT: bc.ne .LBB3_9 +; HBC-NEXT: // %bb.10: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB3_5 +; HBC-NEXT: b .LBB3_7 entry: %cmp39 = icmp sgt i32 %width, 0 - br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -652,7 +999,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep44, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader57, label %vector.ph + br i1 %found.conflict, label %for.body.preheader57, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -700,11 +1047,11 @@ vector.body: ; preds = %vector.body, %vecto store <16 x i8> %interleaved.vec, ptr %31, align 1 %index.next = add nuw i64 %index, 4 %34 = icmp eq i64 %index.next, %n.vec - br i1 %34, label %middle.block, label %vector.body + br i1 %34, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57, !consistent !10 for.body.preheader57: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.042.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -757,5 +1104,7 @@ for.body: ; preds = %for.body.preheader5 %add.ptr14 = getelementptr inbounds i8, ptr %dst.addr.040, i64 4 %inc = add nuw nsw i32 %i.042, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } + +!10 = !{i1 true}