From 375575418f0ebbed871711fb8b47e984f59d339f Mon Sep 17 00:00:00 2001 From: Pavel Iliin Date: Tue, 11 Jul 2023 10:07:02 +0100 Subject: [PATCH] [RFC] Introducing `__builtin_consistent` to generate AArch64 BC.cond instructions The patch adds new `__builtin_consistent` which when used with control flow conditions provides a hint to compiler that branch or switch is very unlikely to change direction. The hint is used to generate efficient conditional branch instructions on targets which support them. Currently it is `BC.cond` on AArch64 when FEAT_HBC enabled. --- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 5 +- clang/docs/LanguageExtensions.rst | 29 ++ clang/include/clang/Basic/Builtins.def | 5 +- clang/lib/Analysis/CalledOnceCheck.cpp | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 10 +- clang/lib/CodeGen/CGStmt.cpp | 17 +- clang/lib/CodeGen/CodeGenFunction.cpp | 18 +- .../Checkers/BuiltinFunctionChecker.cpp | 3 +- clang/test/CodeGen/builtin-consistent.c | 37 ++ llvm/include/llvm/CodeGen/MachineInstr.h | 1 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 8 +- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 +- llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/IRBuilder.h | 26 +- llvm/include/llvm/IR/MDBuilder.h | 3 + llvm/lib/CodeGen/BranchFolding.cpp | 45 +- llvm/lib/CodeGen/MIRPrinter.cpp | 2 + llvm/lib/CodeGen/MachineBasicBlock.cpp | 25 +- llvm/lib/CodeGen/MachineInstr.cpp | 5 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 +- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 3 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 57 ++- .../SelectionDAG/SelectionDAGBuilder.h | 12 +- .../SelectionDAG/SelectionDAGDumper.cpp | 3 + llvm/lib/IR/IRBuilder.cpp | 3 +- llvm/lib/IR/MDBuilder.cpp | 5 + .../Target/AArch64/AArch64CondBrTuning.cpp | 6 +- .../AArch64/AArch64ConditionOptimizer.cpp | 2 +- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 41 +- .../Target/AArch64/AArch64ISelLowering.cpp | 50 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + .../lib/Target/AArch64/AArch64InstrFormats.td | 4 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 45 +- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 14 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +- .../lib/Target/AArch64/AArch64MacroFusion.cpp | 3 +- .../AArch64RedundantCopyElimination.cpp | 2 +- .../GISel/AArch64InstructionSelector.cpp | 27 +- llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 8 +- llvm/lib/Target/AMDGPU/R600InstrInfo.h | 8 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 +- llvm/lib/Target/ARC/ARCInstrInfo.cpp | 7 +- llvm/lib/Target/ARC/ARCInstrInfo.h | 8 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 7 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 8 +- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 7 +- llvm/lib/Target/AVR/AVRInstrInfo.h | 8 +- llvm/lib/Target/BPF/BPFInstrInfo.cpp | 8 +- llvm/lib/Target/BPF/BPFInstrInfo.h | 9 +- llvm/lib/Target/CSKY/CSKYInstrInfo.cpp | 13 +- llvm/lib/Target/CSKY/CSKYInstrInfo.h | 8 +- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 9 +- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 8 +- llvm/lib/Target/Lanai/LanaiInstrInfo.cpp | 8 +- llvm/lib/Target/Lanai/LanaiInstrInfo.h | 10 +- .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 +- .../lib/Target/LoongArch/LoongArchInstrInfo.h | 8 +- llvm/lib/Target/M68k/M68kInstrInfo.cpp | 13 +- llvm/lib/Target/M68k/M68kInstrInfo.h | 8 +- llvm/lib/Target/MSP430/MSP430InstrInfo.cpp | 7 +- llvm/lib/Target/MSP430/MSP430InstrInfo.h | 8 +- llvm/lib/Target/Mips/MipsInstrInfo.cpp | 8 +- llvm/lib/Target/Mips/MipsInstrInfo.h | 8 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 8 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.h | 8 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 +- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 8 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 13 +- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 8 +- llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp | 13 +- llvm/lib/Target/SPIRV/SPIRVInstrInfo.h | 8 +- llvm/lib/Target/Sparc/SparcInstrInfo.cpp | 8 +- llvm/lib/Target/Sparc/SparcInstrInfo.h | 8 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 7 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 8 +- llvm/lib/Target/VE/VEInstrInfo.cpp | 7 +- llvm/lib/Target/VE/VEInstrInfo.h | 8 +- .../WebAssembly/WebAssemblyInstrInfo.cpp | 12 +- .../Target/WebAssembly/WebAssemblyInstrInfo.h | 8 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 8 +- llvm/lib/Target/X86/X86InstrInfo.h | 8 +- llvm/lib/Target/XCore/XCoreInstrInfo.cpp | 8 +- llvm/lib/Target/XCore/XCoreInstrInfo.h | 8 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 10 +- llvm/test/CodeGen/AArch64/cond-br-tuning.ll | 117 +++++- llvm/test/CodeGen/AArch64/tbl-loops.ll | 395 +++++++++++++++++- 88 files changed, 1100 insertions(+), 384 deletions(-) create mode 100644 clang/test/CodeGen/builtin-consistent.c diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index b852b9fbc9c52..725e0e2adb821 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -962,7 +962,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { if (isTB(Inst) || isCB(Inst)) { Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode())); assert(Inst.getOpcode() != 0 && "Invalid branch instruction"); - } else if (Inst.getOpcode() == AArch64::Bcc) { + } else if (Inst.getOpcode() == AArch64::Bcc || + Inst.getOpcode() == AArch64::BCcc) { Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode( static_cast(Inst.getOperand(0).getImm()))); assert(Inst.getOperand(0).getImm() != AArch64CC::AL && @@ -991,6 +992,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { case AArch64::B: return 28; case AArch64::BL: return 28; case AArch64::Bcc: return 21; + case AArch64::BCcc: + return 21; } } diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 294210c6ac140..8f68f144691cc 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3102,6 +3102,35 @@ flow conditions such as in ``if`` and ``switch`` statements. Query for this feature with ``__has_builtin(__builtin_unpredictable)``. +``__builtin_consistent`` +------------------------ + +``__builtin_consistent`` is used to indicate that the value of an expression is +very likely to be consistent, e.g. branch having expression as condition will +behave very consistently and is very unlikely to change direction. + +**Syntax**: + +.. code-block:: c++ + + __builtin_consistent(long long) + +**Example of use**: + +.. code-block:: c++ + + if (__builtin_consistent(x > 0)) { + foo(); + } + +**Description**: + +The ``__builtin_consistent()`` builtin used with control flow conditions will +provide information about branch consistently behavior which can be used to +generate more efficient conditional branch instructions if target supports them +(like AArch64 FEAT_HBC ``BC.cond``). + +Query for this feature with ``__has_builtin(__builtin_consistent)``. ``__builtin_expect`` -------------------- diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index ec39e926889b9..948964c90dc76 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -667,8 +667,9 @@ BUILTIN(__builtin___printf_chk, "iicC*R.", "Fp:1:") BUILTIN(__builtin___vfprintf_chk, "iP*RicC*Ra", "FP:2:") BUILTIN(__builtin___vprintf_chk, "iicC*Ra", "FP:1:") -BUILTIN(__builtin_unpredictable, "LiLi" , "nc") -BUILTIN(__builtin_expect, "LiLiLi" , "ncE") +BUILTIN(__builtin_unpredictable, "LiLi", "nc") +BUILTIN(__builtin_consistent, "LiLi", "nc") +BUILTIN(__builtin_expect, "LiLiLi", "ncE") BUILTIN(__builtin_expect_with_probability, "LiLiLid", "ncE") BUILTIN(__builtin_prefetch, "vvC*.", "nc") BUILTIN(__builtin_readcyclecounter, "ULLi", "n") diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp index 5b4fc24b6f0e2..d1725f6151dba 100644 --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -365,6 +365,7 @@ class DeclRefFinder } case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: return Visit(CE->getArg(0)); default: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb6..63ccd42f9e313 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3137,10 +3137,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, "cast"); return RValue::get(Result); } - case Builtin::BI__builtin_unpredictable: { - // Always return the argument of __builtin_unpredictable. LLVM does not - // handle this builtin. Metadata for this builtin should be added directly - // to instructions such as branches or switches that use it. + case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: { + // Always return the argument of __builtin_unpredictable and + // __builtin_consistent. LLVM does not handle these builtins. Metadata for + // these builtins should be added directly to instructions such as branches + // or switches that use it. return RValue::get(EmitScalarExpr(E->getArg(0))); } case Builtin::BI__builtin_expect: { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index c719df1bfa050..2aa4b8253c7af 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2059,16 +2059,21 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { EmitBlock(SwitchExit.getBlock(), true); incrementProfileCounter(&S); - // If the switch has a condition wrapped by __builtin_unpredictable, - // create metadata that specifies that the switch is unpredictable. - // Don't bother if not optimizing because that metadata would not be used. + // If the switch has a condition wrapped by __builtin_unpredictable or + // __builtin_consistent, create metadata that specifies that the switch is + // unpredictable or consistent correspondingly. Don't bother if not optimizing + // because that metadata would not be used. auto *Call = dyn_cast(S.getCond()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (FD) { llvm::MDBuilder MDHelper(getLLVMContext()); - SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, - MDHelper.createUnpredictable()); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) + SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable, + MDHelper.createUnpredictable()); + if (FD->getBuiltinID() == Builtin::BI__builtin_consistent) + SwitchInsn->setMetadata(llvm::LLVMContext::MD_consistent, + MDHelper.createConsistent()); } } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 0f2b9055b88eb..a372913f8732c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1908,16 +1908,21 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::MDNode *Weights = nullptr; llvm::MDNode *Unpredictable = nullptr; + llvm::MDNode *Consistent = nullptr; - // If the branch has a condition wrapped by __builtin_unpredictable, - // create metadata that specifies that the branch is unpredictable. - // Don't bother if not optimizing because that metadata would not be used. + // If the branch has a condition wrapped by __builtin_unpredictable or + // __builtin_consistent, create metadata that specifies that the branch is + // unpredictable or consistent correspondingly. Don't bother if not optimizing + // because that metadata would not be used. auto *Call = dyn_cast(Cond->IgnoreImpCasts()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null(Call->getCalleeDecl()); - if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { + if (FD) { llvm::MDBuilder MDHelper(getLLVMContext()); - Unpredictable = MDHelper.createUnpredictable(); + if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) + Unpredictable = MDHelper.createUnpredictable(); + if (FD->getBuiltinID() == Builtin::BI__builtin_consistent) + Consistent = MDHelper.createConsistent(); } } @@ -1932,7 +1937,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount); } - Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable); + Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable, + Consistent); } /// ErrorUnsupported - Print out an error that codegen doesn't support the diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp index 4a56156de4b27..3de5c6676a732 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -63,12 +63,13 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call, } case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_consistent: case Builtin::BI__builtin_expect: case Builtin::BI__builtin_expect_with_probability: case Builtin::BI__builtin_assume_aligned: case Builtin::BI__builtin_addressof: case Builtin::BI__builtin_function_start: { - // For __builtin_unpredictable, __builtin_expect, + // For __builtin_unpredictable, __builtin_consistent, __builtin_expect, // __builtin_expect_with_probability and __builtin_assume_aligned, // just return the value of the subexpression. // __builtin_addressof is going from a reference to a pointer, but those diff --git a/clang/test/CodeGen/builtin-consistent.c b/clang/test/CodeGen/builtin-consistent.c new file mode 100644 index 0000000000000..3f1be494acf14 --- /dev/null +++ b/clang/test/CodeGen/builtin-consistent.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -disable-llvm-passes -o - %s -O1 | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=CHECK_O0 + +void f(void); +void g(void); +void consistent_branch(int x) { +// CHECK-LABEL: define{{.*}} void @consistent_branch( +// CHECK-NOT: builtin_consistent +// CHECK: !consistent [[METADATA:.+]] +// CHECK_O0-NOT: builtin_consistent +// CHECK_O0-NOT: !consistent + if (__builtin_consistent(x > 0)) + f(); + + if (x || __builtin_consistent(x != 0)) + g(); +} + +int consistent_switch(int x) { +// CHECK-LABEL: @consistent_switch( +// CHECK-NOT: builtin_consistent +// CHECK: !consistent [[METADATA:.+]] +// CHECK_O0-NOT: builtin_consistent +// CHECK_O0-NOT: !consistent + switch(__builtin_consistent(x)) { + default: + return x; + case 0: + case 1: + case 2: + return 1; + case 3: + return x-1; + }; +} +// CHECK: [[METADATA]] = !{i1 true} + diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index bd72ac23fc9c0..1fa27026d80f8 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -114,6 +114,7 @@ class MachineInstr // this instruction. Unpredictable = 1 << 16, // Instruction with unpredictable condition. NoConvergent = 1 << 17, // Call does not require convergence guarantees. + Consistent = 1 << 18, // Instruction condition behaves consistently. }; private: diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 4df56aac4aa17..58680867953f8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -398,6 +398,8 @@ struct SDNodeFlags { bool NoFPExcept : 1; // Instructions with attached 'unpredictable' metadata on IR level. bool Unpredictable : 1; + // Instructions with attached 'consistent' metadata on IR level. + bool Consistent : 1; public: /// Default constructor turns off all optimization flags. @@ -405,7 +407,8 @@ struct SDNodeFlags { : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NonNeg(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} + AllowReassociation(false), NoFPExcept(false), Unpredictable(false), + Consistent(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -432,6 +435,7 @@ struct SDNodeFlags { void setAllowReassociation(bool b) { AllowReassociation = b; } void setNoFPExcept(bool b) { NoFPExcept = b; } void setUnpredictable(bool b) { Unpredictable = b; } + void setConsistent(bool b) { Consistent = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -447,6 +451,7 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } bool hasUnpredictable() const { return Unpredictable; } + bool hasConsistent() const { return Consistent; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. @@ -464,6 +469,7 @@ struct SDNodeFlags { AllowReassociation &= Flags.AllowReassociation; NoFPExcept &= Flags.NoFPExcept; Unpredictable &= Flags.Unpredictable; + Consistent &= Flags.Consistent; } }; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8e7499ac626a7..e893baa90c14d 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -698,7 +698,8 @@ class TargetInstrInfo : public MCInstrInfo { /// If \p BytesRemoved is non-null, report the change in code size from the /// removed instructions. virtual unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const { + int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const { llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!"); } @@ -718,8 +719,8 @@ class TargetInstrInfo : public MCInstrInfo { virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const { + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const { llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!"); } diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index b375d0f091206..6dd0627c028bd 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -51,3 +51,4 @@ LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) +LLVM_FIXED_MD_KIND(MD_consistent, "consistent", 40) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index e3c4e76f90a4c..0cd6a73408457 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1064,15 +1064,18 @@ class IRBuilderBase { //===--------------------------------------------------------------------===// private: - /// Helper to add branch weight and unpredictable metadata onto an - /// instruction. + /// Helper to add branch weight, unpredictable and consistent metadata onto + /// an instruction. /// \returns The annotated instruction. template - InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) { + InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpred, + MDNode *Consist) { if (Weights) I->setMetadata(LLVMContext::MD_prof, Weights); - if (Unpredictable) - I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable); + if (Unpred) + I->setMetadata(LLVMContext::MD_unpredictable, Unpred); + if (Consist) + I->setMetadata(LLVMContext::MD_consistent, Consist); return I; } @@ -1110,9 +1113,10 @@ class IRBuilderBase { /// instruction. BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights = nullptr, - MDNode *Unpredictable = nullptr) { + MDNode *Unpredictable = nullptr, + MDNode *Consistent = nullptr) { return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond), - BranchWeights, Unpredictable)); + BranchWeights, Unpredictable, Consistent)); } /// Create a conditional 'br Cond, TrueDest, FalseDest' @@ -1121,7 +1125,8 @@ class IRBuilderBase { Instruction *MDSrc) { BranchInst *Br = BranchInst::Create(True, False, Cond); if (MDSrc) { - unsigned WL[4] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable, + unsigned WL[5] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable, + LLVMContext::MD_consistent, LLVMContext::MD_make_implicit, LLVMContext::MD_dbg}; Br->copyMetadata(*MDSrc, WL); } @@ -1133,9 +1138,10 @@ class IRBuilderBase { /// allocation). SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10, MDNode *BranchWeights = nullptr, - MDNode *Unpredictable = nullptr) { + MDNode *Unpredictable = nullptr, + MDNode *Consistent = nullptr) { return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases), - BranchWeights, Unpredictable)); + BranchWeights, Unpredictable, Consistent)); } /// Create an indirect branch instruction with the specified address diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 39165453de16b..f3ec99568cfa4 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -67,6 +67,9 @@ class MDBuilder { /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); + /// Return metadata specifying that a branch or switch behaves consistently. + MDNode *createConsistent(); + /// Return metadata containing the entry \p Count for a function, a boolean /// \Synthetic indicating whether the counts were synthetized, and the /// GUIDs stored in \p Imports that need to be imported for sample PGO, to diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 0801296cab49f..e8fca44a816d9 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -465,8 +465,10 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->reverseBranchCondition(Cond)) { - TII->removeBranch(*CurMBB); - TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); + bool IsConsistent = false; + TII->removeBranch(*CurMBB, nullptr, &IsConsistent); + TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl, nullptr, + IsConsistent); return; } } @@ -1116,12 +1118,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { + bool IsConsistent = false; DebugLoc dl = PBB->findBranchDebugLoc(); - TII->removeBranch(*PBB); + TII->removeBranch(*PBB, nullptr, &IsConsistent); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, - NewCond, dl); + TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, NewCond, + dl, nullptr, IsConsistent); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB)); @@ -1443,9 +1446,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1457,9 +1462,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (PriorTBB == MBB) { SmallVector NewPriorCond(PriorCond); if (!TII->reverseBranchCondition(NewPriorCond)) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1495,9 +1502,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl, nullptr, + IsConsistent); // Move this block to the end of the function. MBB->moveAfter(&MF.back()); @@ -1558,9 +1567,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector NewCond(CurCond); if (!TII->reverseBranchCondition(NewCond)) { + bool IsConsistent = false; DebugLoc dl = getBranchDebugLoc(*MBB); - TII->removeBranch(*MBB); - TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + TII->removeBranch(*MBB, nullptr, &IsConsistent); + TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl, nullptr, + IsConsistent); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1606,9 +1617,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { assert(!PriorFBB && "Machine CFG out of date!"); PriorFBB = MBB; } + bool IsConsistent = false; DebugLoc pdl = getBranchDebugLoc(PrevBB); - TII->removeBranch(PrevBB); - TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); + TII->removeBranch(PrevBB, nullptr, &IsConsistent); + TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl, + nullptr, IsConsistent); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1654,7 +1667,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } // Add the branch back if the block is more than just an uncond branch. - TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl); + TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl, nullptr, 0); } } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index fee237104022e..75db4141e486f 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -805,6 +805,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "unpredictable "; if (MI.getFlag(MachineInstr::NoConvergent)) OS << "noconvergent "; + if (MI.getFlag(MachineInstr::Consistent)) + OS << "consistent "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index d9e22685faf5f..03cc56313d54d 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -719,23 +719,25 @@ void MachineBasicBlock::updateTerminator( // If the unconditional successor block is not the current layout // successor, insert a branch to jump to it. if (!isLayoutSuccessor(PreviousLayoutSuccessor)) - TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL, + nullptr, 0); } return; } if (FBB) { + bool IsConsistent = false; // The block has a non-fallthrough conditional branch. If one of its // successors is its layout successor, rewrite it to a fallthrough // conditional branch. if (isLayoutSuccessor(TBB)) { if (TII->reverseBranchCondition(Cond)) return; - TII->removeBranch(*this); - TII->insertBranch(*this, FBB, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, FBB, nullptr, Cond, DL, nullptr, IsConsistent); } else if (isLayoutSuccessor(FBB)) { - TII->removeBranch(*this); - TII->insertBranch(*this, TBB, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, TBB, nullptr, Cond, DL, nullptr, IsConsistent); } return; } @@ -757,6 +759,7 @@ void MachineBasicBlock::updateTerminator( return; } + bool IsConsistent = false; // The block has a fallthrough conditional branch. if (isLayoutSuccessor(TBB)) { if (TII->reverseBranchCondition(Cond)) { @@ -765,11 +768,13 @@ void MachineBasicBlock::updateTerminator( TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); return; } - TII->removeBranch(*this); - TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL, + nullptr, IsConsistent); } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) { - TII->removeBranch(*this); - TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL); + TII->removeBranch(*this, nullptr, &IsConsistent); + TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL, nullptr, + IsConsistent); } } @@ -1218,7 +1223,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); SmallVector Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); - TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); + TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL, nullptr, 0); } // Fix PHI nodes in Succ so they refer to NMBB instead of this. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 9e7b4df2576fe..520a31ff8e1be 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -586,6 +586,9 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { if (I.getMetadata(LLVMContext::MD_unpredictable)) MIFlags |= MachineInstr::MIFlag::Unpredictable; + if (I.getMetadata(LLVMContext::MD_consistent)) + MIFlags |= MachineInstr::MIFlag::Consistent; + return MIFlags; } @@ -1693,6 +1696,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nofpexcept "; if (getFlag(MachineInstr::NoMerge)) OS << "nomerge "; + if (getFlag(MachineInstr::Consistent)) + OS << "consistent "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a867d88f76c0c..3dec1f636a6f1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17702,6 +17702,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } SDValue DAGCombiner::visitBRCOND(SDNode *N) { + unsigned BrOpcode = N->getOpcode(); + SDNodeFlags Flags; + Flags.setConsistent(N->getFlags().hasConsistent()); SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); @@ -17709,8 +17712,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are // nondeterministic jumps). if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) { - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, - N1->getOperand(0), N2); + return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, Chain, N1->getOperand(0), + N2, Flags); } // Variant of the previous fold where there is a SETCC in between: @@ -17758,8 +17761,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Updated) return DAG.getNode( - ISD::BRCOND, SDLoc(N), MVT::Other, Chain, - DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2); + BrOpcode, SDLoc(N), MVT::Other, Chain, + DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2, + Flags); } // If N is a constant we could fold this into a fallthrough or unconditional @@ -17773,9 +17777,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - Chain, N1.getOperand(2), - N1.getOperand(0), N1.getOperand(1), N2); + SDValue Ops[] = {Chain, N1.getOperand(2), N1.getOperand(0), + N1.getOperand(1), N2}; + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Ops, Flags); } if (N1.hasOneUse()) { @@ -17783,8 +17787,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, - ChainHandle.getValue(), NewN1, N2); + return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, ChainHandle.getValue(), + NewN1, N2, Flags); } return SDValue(); @@ -17906,11 +17910,13 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc - if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - N->getOperand(0), Simp.getOperand(2), - Simp.getOperand(0), Simp.getOperand(1), - N->getOperand(4)); + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) { + SDNodeFlags Flags; + Flags.setConsistent(N->getFlags().hasConsistent()); + SDValue Ops[] = {N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), + Simp.getOperand(1), N->getOperand(4)}; + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, Ops, Flags); + } return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a27febe15db83..406fae0f22aaf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1083,6 +1083,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setFlag(MachineInstr::MIFlag::Unpredictable); } + if (Node->getFlags().hasConsistent()) + MIB.getInstr()->setFlag(MachineInstr::MIFlag::Consistent); + // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ad5a4506efbd8..433204f4bc188 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1061,7 +1061,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { : Opc == ISD::SETCCCARRY ? 3 : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 : 1; - unsigned CompareOperand = Opc == ISD::BR_CC ? 2 + unsigned CompareOperand = (Opc == ISD::BR_CC) ? 2 : Opc == ISD::STRICT_FSETCC ? 1 : Opc == ISD::STRICT_FSETCCS ? 1 : 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index aab0d5c5a348b..ad8ae961a57fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2548,7 +2548,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { } // Emit the branch for this block. - visitSwitchCase(SL->SwitchCases[0], BrMBB); + visitSwitchCase(SL->SwitchCases[0], BrMBB, + I.hasMetadata(LLVMContext::MD_consistent)); SL->SwitchCases.erase(SL->SwitchCases.begin()); return; } @@ -2568,13 +2569,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. - visitSwitchCase(CB, BrMBB); + visitSwitchCase(CB, BrMBB, I.hasMetadata(LLVMContext::MD_consistent)); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + bool IsConsistent) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = CB.DL; @@ -2652,9 +2654,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, getControlRoot(), Cond, - DAG.getBasicBlock(CB.TrueBB)); + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), + Cond, DAG.getBasicBlock(CB.TrueBB), Flags); setValue(CurInst, BrCond); @@ -2887,7 +2890,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + bool IsConsistent) { SDLoc dl = getCurSDLoc(); // Subtract the minimum value. @@ -2935,9 +2939,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, RangeSub.getValueType()), RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), ISD::SETUGT); - + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, - DAG.getBasicBlock(B.Default)); + DAG.getBasicBlock(B.Default), Flags); } // Avoid emitting unnecessary branches to the next block. @@ -3404,6 +3409,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Flags.setUnpredictable( cast(I).getMetadata(LLVMContext::MD_unpredictable)); + Flags.setConsistent( + cast(I).getMetadata(LLVMContext::MD_consistent)); + // Min/max matching is only viable if all output VTs are the same. if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -11357,9 +11365,12 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { + const Value *Cond = SI.getCondition(); + bool IsConsistent = SI.getMetadata(LLVMContext::MD_consistent); MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI(W.MBB); @@ -11412,9 +11423,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. + SDNodeFlags Flags; + Flags.setConsistent(IsConsistent); SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, - DAG.getBasicBlock(Small.MBB)); + DAG.getBasicBlock(Small.MBB), Flags); // Insert the false branch. BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, DAG.getBasicBlock(DefaultMBB)); @@ -11571,7 +11584,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { - visitBitTestHeader(*BTB, SwitchMBB); + visitBitTestHeader(*BTB, SwitchMBB, IsConsistent); BTB->Emitted = true; } break; @@ -11602,7 +11615,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) - visitSwitchCase(CB, SwitchMBB); + visitSwitchCase(CB, SwitchMBB, IsConsistent); else SL->SwitchCases.push_back(CB); @@ -11627,7 +11640,7 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, - Value *Cond, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); @@ -11729,7 +11742,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, WorkList.push_back( {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(Cond); + ExportFromCurrentBlock(SI.getCondition()); } // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a @@ -11745,15 +11758,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, WorkList.push_back( {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(Cond); + ExportFromCurrentBlock(SI.getCondition()); } // Create the CaseBlock record that will be used to lower the branch. - CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - getCurSDLoc(), LeftProb, RightProb); + CaseBlock CB(ISD::SETLT, SI.getCondition(), Pivot, nullptr, LeftMBB, RightMBB, + W.MBB, getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) - visitSwitchCase(CB, SwitchMBB); + visitSwitchCase(CB, SwitchMBB, SI.getMetadata(LLVMContext::MD_consistent)); else SL->SwitchCases.push_back(CB); } @@ -11815,7 +11828,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, nullptr, nullptr, TopCaseProb.getCompl()}; - lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + lowerWorkItem(W, SI, SwitchMBB, PeeledSwitchMBB); Clusters.erase(PeeledCaseIt); for (CaseCluster &CC : Clusters) { @@ -11908,11 +11921,11 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None && !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. - splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + splitWorkItem(WorkList, W, SI, SwitchMBB); continue; } - lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + lowerWorkItem(W, SI, SwitchMBB, DefaultMBB); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a97884f0efb9a..a426c1c871fc9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -222,11 +222,11 @@ class SelectionDAGBuilder { /// Emit comparison and split W into two subtrees. void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, - const SwitchCG::SwitchWorkListItem &W, Value *Cond, - MachineBasicBlock *SwitchMBB); + const SwitchCG::SwitchWorkListItem &W, + const SwitchInst &SI, MachineBasicBlock *SwitchMBB); /// Lower W. - void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, + void lowerWorkItem(SwitchCG::SwitchWorkListItem W, const SwitchInst &SI, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); @@ -525,12 +525,14 @@ class SelectionDAGBuilder { BranchProbability Prob = BranchProbability::getUnknown()); public: - void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB, + bool IsConsistent = false); void visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB); void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(SwitchCG::BitTestBlock &B, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + bool IsConsistent = false); void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, BranchProbability BranchProbToNext, unsigned Reg, SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 78cc60084068a..ad4e4850bdc75 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -624,6 +624,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasNoFPExcept()) OS << " nofpexcept"; + if (getFlags().hasConsistent()) + OS << " consistent"; + if (const MachineSDNode *MN = dyn_cast(this)) { if (!MN->memoperands_empty()) { OS << "<"; diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index b321d8b325fe0..9ad1d24f79a69 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1125,7 +1125,8 @@ Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, if (MDFrom) { MDNode *Prof = MDFrom->getMetadata(LLVMContext::MD_prof); MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable); - Sel = addBranchMetadata(Sel, Prof, Unpred); + MDNode *Consist = MDFrom->getMetadata(LLVMContext::MD_consistent); + Sel = addBranchMetadata(Sel, Prof, Unpred, Consist); } if (isa(Sel)) setFPAttrs(Sel, nullptr /* MDNode* */, FMF); diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 2490b3012bdc2..ec0e2ec93d30e 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -56,6 +56,11 @@ MDNode *MDBuilder::createUnpredictable() { return MDNode::get(Context, std::nullopt); } +MDNode *MDBuilder::createConsistent() { + return MDNode::get( + Context, createConstant(ConstantInt::get(Type::getInt1Ty(Context), 1))); +} + MDNode *MDBuilder::createFunctionEntryCount( uint64_t Count, bool Synthetic, const DenseSet *Imports) { diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp index da72e35a248eb..10f7f6e9923dd 100644 --- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -132,7 +132,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { CC = AArch64CC::MI; break; } - return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getFlag(MachineInstr::MIFlag::Consistent) && + TII->getSubtarget().hasHBC() + ? TII->get(AArch64::BCcc) + : TII->get(AArch64::Bcc)) .addImm(CC) .addMBB(TargetMBB); } diff --git a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index 1c20e24e41d7e..c412c8d76aed0 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -149,7 +149,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( if (Term == MBB->end()) return nullptr; - if (Term->getOpcode() != AArch64::Bcc) + if (Term->getOpcode() != AArch64::Bcc && Term->getOpcode() != AArch64::BCcc) return nullptr; // Since we may modify cmp of this MBB, make sure NZCV does not live out. diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 9b8162ce8dd4d..76033c746c280 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2449,16 +2449,30 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // Emit the extra branch for FCMP_UEQ and FCMP_ONE. if (ExtraCC != AArch64CC::AL) { + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) + // For branches with consistent metadata emit conditional branches + // with a hint that it will behave very consistently if target + // supports HBC + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(AArch64::BCcc)) + .addImm(ExtraCC) + .addMBB(TBB); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) + .addImm(ExtraCC) + .addMBB(TBB); + } + // Emit the branch. + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc)) + .addImm(CC) + .addMBB(TBB); + } else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(ExtraCC) + .addImm(CC) .addMBB(TBB); } - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2485,10 +2499,17 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { if (!CondReg) return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) { + // Emit conditional branch with a consistent behaviour hint + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc)) + .addImm(CC) + .addMBB(TBB); + } else { + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); + } finishCondBranch(BI->getParent(), TBB, FBB); return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3bff2845b7a13..945328d0e7e38 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2346,6 +2346,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::LOADgot) MAKE_CASE(AArch64ISD::RET_GLUE) MAKE_CASE(AArch64ISD::BRCOND) + MAKE_CASE(AArch64ISD::BRCCOND) MAKE_CASE(AArch64ISD::CSEL) MAKE_CASE(AArch64ISD::CSINV) MAKE_CASE(AArch64ISD::CSNEG) @@ -8604,6 +8605,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + SDNodeFlags Flags; + bool IsConsistent = Op.getNode()->getFlags().hasConsistent(); + Flags.setConsistent(IsConsistent); + unsigned BRCondOpc = IsConsistent && Subtarget->hasHBC() ? AArch64ISD::BRCCOND + : AArch64ISD::BRCOND; MachineFunction &MF = DAG.getMachineFunction(); // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions @@ -8643,8 +8649,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { OFCC = getInvertedCondCode(OFCC); SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Overflow); + SDValue Ops[] = {Chain, Dest, CCVal, Overflow}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } if (LHS.getValueType().isInteger()) { @@ -8665,12 +8671,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), dl, MVT::i64), - Dest); + SDValue Ops[] = {Chain, Test, + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest}; + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags); } - return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest, + Flags); } else if (CC == ISD::SETNE) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is @@ -8681,20 +8688,22 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); - return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), dl, MVT::i64), - Dest); + SDValue Ops[] = {Chain, Test, + DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest}; + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags); } - return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); + return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest, + Flags); } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) { // Don't combine AND since emitComparison converts the AND to an ANDS // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t SignBitPos; std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); - return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); + SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64), + Dest}; + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags); } } if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && @@ -8704,14 +8713,15 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // becomes redundant. This would also increase register pressure. uint64_t SignBitPos; std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); - return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, - DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); + SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64), + Dest}; + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags); } SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Cmp); + SDValue Ops[] = {Chain, Dest, CCVal, Cmp}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || @@ -8723,12 +8733,12 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); - SDValue BR1 = - DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); + SDValue BR1Ops[] = {Chain, Dest, CC1Val, Cmp}; + SDValue BR1 = DAG.getNode(BRCondOpc, dl, MVT::Other, BR1Ops, Flags); if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); - return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, - Cmp); + SDValue Ops[] = {BR1, Dest, CC2Val, Cmp}; + return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags); } return BR1; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7332a95615a4d..a6968587a2f96 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -72,6 +72,7 @@ enum NodeType : unsigned { // Offset Table, TLS record). RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. BRCOND, // Conditional branch instruction; "b.cond". + BRCCOND, // Hinted Conditional Branch "BC.cond". CSEL, CSINV, // Conditional select invert. CSNEG, // Conditional select negate. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index f88f5a240a1fd..a3015a31b83e7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -2089,10 +2089,10 @@ def am_brcond : Operand { let OperandType = "OPERAND_PCREL"; } -class BranchCond +class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), mnemonic, ".$cond\t$target", "", - [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { + [(node bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; let Uses = [NZCV]; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 6fdf5363bae29..7a24b11fc2b8d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -180,6 +180,7 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, default: llvm_unreachable("Unknown branch instruction?"); case AArch64::Bcc: + case AArch64::BCcc: Target = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); break; @@ -221,6 +222,7 @@ static unsigned getBranchDisplacementBits(unsigned Opc) { case AArch64::CBZX: return CBZDisplacementBits; case AArch64::Bcc: + case AArch64::BCcc: return BCCDisplacementBits; } } @@ -250,6 +252,7 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { case AArch64::CBZX: case AArch64::CBNZX: case AArch64::Bcc: + case AArch64::BCcc: return MI.getOperand(1).getMBB(); } } @@ -535,7 +538,10 @@ bool AArch64InstrInfo::reverseBranchCondition( } unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { + if (IsConsistent) + *IsConsistent = false; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; @@ -544,6 +550,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, !isCondBranchOpcode(I->getOpcode())) return 0; + if (I->getOpcode() == AArch64::BCcc) + if (IsConsistent) + *IsConsistent = true; + // Remove the branch. I->eraseFromParent(); @@ -561,6 +571,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, return 1; } + if (I->getOpcode() == AArch64::BCcc) + if (IsConsistent) + *IsConsistent = true; + // Remove the branch. I->eraseFromParent(); if (BytesRemoved) @@ -569,12 +583,16 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, return 2; } -void AArch64InstrInfo::instantiateCondBranch( - MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, - ArrayRef Cond) const { +void AArch64InstrInfo::instantiateCondBranch(MachineBasicBlock &MBB, + const DebugLoc &DL, + MachineBasicBlock *TBB, + ArrayRef Cond, + bool IsConsistent) const { if (Cond[0].getImm() != -1) { // Regular Bcc - BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); + BuildMI(&MBB, DL, get(IsConsistent ? AArch64::BCcc : AArch64::Bcc)) + .addImm(Cond[0].getImm()) + .addMBB(TBB); } else { // Folded compare-and-branch // Note that we use addOperand instead of addReg to keep the flags. @@ -586,9 +604,12 @@ void AArch64InstrInfo::instantiateCondBranch( } } -unsigned AArch64InstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -596,7 +617,7 @@ unsigned AArch64InstrInfo::insertBranch( if (Cond.empty()) // Unconditional branch? BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); else - instantiateCondBranch(MBB, DL, TBB, Cond); + instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent); if (BytesAdded) *BytesAdded = 4; @@ -605,7 +626,7 @@ unsigned AArch64InstrInfo::insertBranch( } // Two-way conditional branch. - instantiateCondBranch(MBB, DL, TBB, Cond); + instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent); BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); if (BytesAdded) @@ -1618,7 +1639,8 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { default: return -1; - case AArch64::Bcc: { + case AArch64::Bcc: + case AArch64::BCcc: { int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); assert(Idx >= 2); return Idx - 2; @@ -7873,6 +7895,7 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { default: llvm_unreachable("Unknown branch instruction?"); case AArch64::Bcc: + case AArch64::BCcc: return false; case AArch64::CBZW: case AArch64::CBZX: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index a934103c90cbf..0a513aa5254d3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -45,6 +45,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// always be able to get register info as well (through this method). const AArch64RegisterInfo &getRegisterInfo() const { return RI; } + const AArch64Subtarget &getSubtarget() const { return Subtarget; } + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; bool isAsCheapAsAMove(const MachineInstr &MI) const override; @@ -238,12 +240,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, @@ -403,7 +405,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, - ArrayRef Cond) const; + ArrayRef Cond, + bool consistent) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo &MRI) const; bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, @@ -510,6 +513,7 @@ static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } static inline bool isCondBranchOpcode(int Opc) { switch (Opc) { case AArch64::Bcc: + case AArch64::BCcc: case AArch64::CBZW: case AArch64::CBZX: case AArch64::CBNZW: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 290c79f7bacdb..198ae55aea218 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -637,6 +637,8 @@ def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; +def AArch64brccond : SDNode<"AArch64ISD::BRCCOND", SDT_AArch64Brcond, + [SDNPHasChain]>; def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, [SDNPHasChain]>; def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, @@ -2782,12 +2784,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// -def Bcc : BranchCond<0, "b">; +def Bcc : BranchCond<0, "b", AArch64brcond>; // Armv8.8-A variant form which hints to the branch predictor that // this branch is very likely to go the same way nearly all the time // (even though it is not known at compile time _which_ way that is). -def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; +def BCcc : BranchCond<1, "bc", AArch64brccond>, Requires<[HasHBC]>; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 05d60872bf51a..a0b96f74a0289 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -21,7 +21,8 @@ using namespace llvm; /// CMN, CMP, TST followed by Bcc static bool isArithmeticBccPair(const MachineInstr *FirstMI, const MachineInstr &SecondMI, bool CmpOnly) { - if (SecondMI.getOpcode() != AArch64::Bcc) + if (SecondMI.getOpcode() != AArch64::Bcc && + SecondMI.getOpcode() != AArch64::BCcc) return false; // Assume the 1st instr to be a wildcard if it is unspecified. diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index 1494312886a40..0624a0b6b0264 100644 --- a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -137,7 +137,7 @@ bool AArch64RedundantCopyElimination::knownRegValInBlock( } // Otherwise, must be a conditional branch. - if (Opc != AArch64::Bcc) + if (Opc != AArch64::Bcc && Opc != AArch64::BCcc) return false; // Must be an equality check (i.e., == or !=). diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index bdaae4dd724d5..796f75d040c47 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1675,9 +1675,16 @@ bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( AArch64CC::CondCode CC1, CC2; changeFCMPPredToAArch64CC(static_cast(Pred), CC1, CC2); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); - if (CC2 != AArch64CC::AL) - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC1).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); + if (CC2 != AArch64CC::AL) { + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC2).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + } I.eraseFromParent(); return true; } @@ -1790,7 +1797,10 @@ bool AArch64InstructionSelector::selectCompareBranchFedByICmp( emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( static_cast(PredOp.getPredicate())); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); + if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()) + MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC).addMBB(DestMBB); + else + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); I.eraseFromParent(); return true; } @@ -1821,9 +1831,12 @@ bool AArch64InstructionSelector::selectCompareBranch( auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - auto Bcc = MIB.buildInstr(AArch64::Bcc) - .addImm(AArch64CC::NE) - .addMBB(I.getOperand(1).getMBB()); + auto Bcc = + MIB.buildInstr(I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC() + ? AArch64::BCcc + : AArch64::Bcc) + .addImm(AArch64CC::NE) + .addMBB(I.getOperand(1).getMBB()); I.eraseFromParent(); return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); } diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 7f874b245b8f4..cf9e601a73072 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -730,8 +730,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -773,8 +773,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, } } -unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); // Note : we leave PRED* instructions there. diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index f720e4656348c..9d9acb576bfea 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -170,11 +170,11 @@ class R600InstrInfo final : public R600GenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool isPredicated(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 027b695c3bb1a..a000b567c88c0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3036,8 +3036,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); } -unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { unsigned Count = 0; unsigned RemovedSize = 0; for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) { @@ -3066,8 +3066,8 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (!FBB && Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) .addMBB(TBB); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 29f549fc29a3c..aefd6f4fadfc6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -338,13 +338,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition( SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index fe78a98837cf9..bceb525927f3c 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -251,8 +251,8 @@ bool ARCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return false; } -unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "Code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) @@ -370,7 +370,8 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "Code size not handled."); // Shouldn't be a fall through. diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h index c55c9535ec296..031fbcd5d0487 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -57,11 +57,11 @@ class ARCInstrInfo : public ARCGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &, - int *BytesAdded = nullptr) const override; + const DebugLoc &, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4c78379ccf5c4..d249b165dfc1f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -469,7 +469,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -499,8 +500,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); int BOpc = !AFI->isThumbFunction() diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 5efcc1a0d9fc0..9fc6b12b52ce8 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -143,12 +143,12 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 2640ad9e36267..3829706dd8e50 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -398,7 +398,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; @@ -435,8 +436,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, return Count; } -unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h index 290177f5eec66..209b244b786aa 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -99,10 +99,10 @@ class AVRInstrInfo : public AVRGenInstrInfo { bool AllowModify = false) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 2209f1f1462b4..bc852e61d033f 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -221,8 +221,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); // Shouldn't be a fall through. @@ -238,8 +238,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB, llvm_unreachable("Unexpected conditional branch"); } -unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h index 354aca1bd2f93..d9e40493d267d 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -52,12 +52,13 @@ class BPFInstrInfo : public BPFGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; + private: void expandMEMCPY(MachineBasicBlock::iterator) const; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index e5581bcdc3975..3b2f7bc722bc1 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -110,8 +110,8 @@ bool CSKYInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -151,9 +151,12 @@ CSKYInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { return MI.getOperand(NumOp - 1).getMBB(); } -unsigned CSKYInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned CSKYInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index dbb69a7a87980..5a44544b77841 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -59,16 +59,16 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6f0210763bc5f..a4bc553e82d85 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -603,7 +603,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); LLVM_DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB)); @@ -629,9 +630,9 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { - unsigned BOpc = Hexagon::J2_jump; + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { + unsigned BOpc = Hexagon::J2_jump; unsigned BccOpc = Hexagon::J2_jumpt; assert(validateBranchCond(Cond) && "Invalid branching condition"); assert(TBB && "insertBranch must not be told to insert a fallthrough"); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 0bc0877f6e706..b08cc389d2fca 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -111,8 +111,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// Remove the branching code at the end of the specific MBB. /// This is only invoked in cases where analyzeBranch returns success. It /// returns the number of instructions that were removed. - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; /// Insert branch code into the end of the specified MachineBasicBlock. /// The operands to this method are the same as those @@ -126,8 +126,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// merging needs to be disabled. unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; /// Analyze loop L, which must be a single-basic-block loop, and if the /// conditions can be understood enough produce a PipelinerLoopInfo object. diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index aa7e8846406dd..02e1fe3d5d576 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -657,8 +657,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock, MachineBasicBlock *FalseBlock, ArrayRef Condition, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TrueBlock && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -685,8 +685,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator Instruction = MBB.end(); diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index 62f6240c6e468..54a4e396fc70a 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -89,8 +89,8 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { SmallVectorImpl &Condition, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; // For a comparison instruction, return the source registers in SrcReg and // SrcReg2 if having two register operands, and the value it compares against @@ -138,9 +138,9 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock, MachineBasicBlock *FalseBlock, - ArrayRef Condition, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + ArrayRef Condition, const DebugLoc &DL, + int *BytesAdded = nullptr, + bool IsConsistent = false) const override; }; static inline bool isSPLSOpcode(unsigned Opcode) { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index ddd1c9943fac0..fd193c9680fba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -325,7 +325,8 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -357,9 +358,12 @@ unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, // Inserts a branch into the end of the specific MachineBasicBlock, returning // the number of instructions inserted. -unsigned LoongArchInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index 4b145d0baa417..2d198447cb4a6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -62,13 +62,13 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &dl, - int *BytesAdded = nullptr) const override; + const DebugLoc &dl, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index d56fef9e9029a..aad64fc72c411 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -254,8 +254,8 @@ bool M68kInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, AllowModify); } -unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -277,9 +277,12 @@ unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, return Count; } -unsigned M68kInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned M68kInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index 577967f2fdfc9..afa2faf71de6a 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -261,13 +261,13 @@ class M68kInstrInfo : public M68kGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 7405716516643..fb983245c6039 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -104,7 +104,8 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -254,8 +255,8 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index b8d015a21cd15..397e74da63b22 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -61,12 +61,12 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; int64_t getFramePoppedByCallee(const MachineInstr &I) const { assert(isFrameInstr(I) && "Not a frame instruction"); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 392cc15d7943a..99addbd15d7b1 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -135,8 +135,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert(!BytesAdded && "code size not handled"); @@ -165,8 +165,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB, return 1; } -unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index dc4b9d99b39d2..c193ba2d560f2 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -65,13 +65,13 @@ class MipsInstrInfo : public MipsGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index b0d792b5ee3fe..e1690c6306280 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -146,8 +146,8 @@ bool NVPTXInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -176,8 +176,8 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); // Shouldn't be a fall through. diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index cd068a0939300..69a6d919c6214 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -60,12 +60,12 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index e1bb2f72657f8..4f70c6d32fd23 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1440,8 +1440,8 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -1476,8 +1476,8 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 1f59e994d9cb1..50e476258388d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -396,12 +396,12 @@ class PPCInstrInfo : public PPCGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; // Select analysis. bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 9271f807a8483..c8a0db51b270f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1018,8 +1018,8 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); @@ -1052,9 +1052,12 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, // Inserts a branch into the end of the specific MachineBasicBlock, returning // the number of instructions inserted. -unsigned RISCVInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned RISCVInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { if (BytesAdded) *BytesAdded = 0; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index b33d8c2856159..96b14fb662d08 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -108,16 +108,16 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &dl, - int *BytesAdded = nullptr) const override; + const DebugLoc &dl, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 42317453a2370..5c59295ef21a7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -200,8 +200,8 @@ bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // returns the number of instructions that were removed. // If \p BytesRemoved is non-null, report the change in code size from the // removed instructions. -unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { report_fatal_error("Branch removal not supported, as MBB info not propagated" " to OpPhi instructions. Try using -O0 instead."); } @@ -219,9 +219,12 @@ unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, // // The CFG information in MBB.Predecessors and MBB.Successors must be valid // before calling this function. -unsigned SPIRVInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned SPIRVInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { report_fatal_error("Branch insertion not supported, as MBB info not " "propagated to OpPhi instructions. Try using " "-O0 instead."); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index c01e30e109bd5..706846716ef93 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -41,13 +41,13 @@ class SPIRVInstrInfo : public SPIRVGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 90662cd87dcf1..db2236dca290c 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -328,8 +328,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() <= 3) && "Sparc branch conditions should have at most three components!"); @@ -364,8 +364,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; int Removed = 0; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index 7056d6babe17b..470199b3c2093 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -71,13 +71,13 @@ class SparcInstrInfo : public SparcGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index ac8c395f9064f..8377471ae85d8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -447,7 +447,8 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); // Most of the code and comments here are boilerplate. @@ -482,8 +483,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // In this function we output 32-bit branches, which should always // have enough range. They can be shortened and relaxed by later code // in the pipeline, if desired. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index bb883ea464d37..a2c8ecf344c5a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -238,12 +238,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index ebb9e21389c37..620e8ec0e7e4b 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -227,7 +227,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 0) && "VE branch conditions should have three component!"); @@ -288,8 +289,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h index 4fe56f24116f8..4b68534178c50 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -67,13 +67,13 @@ class VEInstrInfo : public VEGenInstrInfo { SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 32a4accd040eb..6f4369a45b27d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -135,7 +135,8 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { + int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::instr_iterator I = MBB.instr_end(); @@ -156,9 +157,12 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB, return Count; } -unsigned WebAssemblyInstrInfo::insertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { +unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { assert(!BytesAdded && "code size not handled"); if (Cond.empty()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index c1e1a790c60e2..7b6362b9dd36f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -56,12 +56,12 @@ class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 56e3ac79b5957..fd8fe2bbaed51 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3373,8 +3373,8 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, return true; } -unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved) const { +unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.end(); @@ -3400,8 +3400,8 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index b0a2d2b890743..c27713a95bc43 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -360,12 +360,12 @@ class X86InstrInfo final : public X86GenInstrInfo { TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, Register, Register, Register, int &, int &, int &) const override; diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index d8a8e2cddf154..dbfffb711f2bf 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -272,8 +272,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { + const DebugLoc &DL, int *BytesAdded, + bool IsConsistent) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -302,8 +302,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -unsigned -XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { +unsigned XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved, + bool *IsConsistent) const { assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h index 9bf7e2dcccb7d..ece4bfa0f23d9 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -56,11 +56,11 @@ class XCoreInstrInfo : public XCoreGenInstrInfo { unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded = nullptr) const override; + const DebugLoc &DL, int *BytesAdded = nullptr, + bool IsConsistent = false) const override; - unsigned removeBranch(MachineBasicBlock &MBB, - int *BytesRemoved = nullptr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr, + bool *IsConsistent = nullptr) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6009558efca06..29a41d012849e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5401,7 +5401,6 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); auto *BB = SI->getParent(); - // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; @@ -5465,7 +5464,9 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, Cmp = ConstantInt::getTrue(SI->getContext()); else Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); + BranchInst *NewBI = + Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest, nullptr, nullptr, + SI->getMetadata(LLVMContext::MD_consistent)); // Update weight for the newly-created conditional branch. if (hasBranchWeightMD(*SI)) { @@ -6675,8 +6676,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } else { Value *Cmp = Builder.CreateICmpULT( TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); - RangeCheckBranch = - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + RangeCheckBranch = Builder.CreateCondBr( + Cmp, LookupBB, SI->getDefaultDest(), nullptr, nullptr, + SI->getMetadata(LLVMContext::MD_consistent)); if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll index dc00c41892ba8..08beaa492d689 100644 --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s +; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -mattr=+hbc -verify-machineinstrs | FileCheck %s -check-prefix=HBC target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linaro-linux-gnueabi" @@ -13,9 +14,16 @@ define void @test_add_cbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: cmn w0, w1 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -32,9 +40,18 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: csel w8, w9, w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz_multiple_use: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: mov w9, #10 // =0xa +; HBC-NEXT: csel w8, w9, w8, ne +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 + L1: store i32 10, ptr %ptr, align 4 ret void @@ -50,9 +67,16 @@ define void @test_add_cbz_64(i64 %a, i64 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str x8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_cbz_64: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: cmn x0, x1 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str x8, [x2] +; HBC-NEXT: ret %c = add nsw i64 %a, %b %d = icmp ne i64 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i64 0, ptr %ptr, align 4 ret void @@ -68,9 +92,16 @@ define void @test_and_cbz(i32 %a, ptr %ptr) { ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_and_cbz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: tst w0, #0x6 +; HBC-NEXT: cset w8, eq +; HBC-NEXT: str w8, [x1] +; HBC-NEXT: ret %c = and i32 %a, 6 %d = icmp ne i32 %c, 0 - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -86,9 +117,16 @@ define void @test_bic_cbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret +; +; HBC-LABEL: test_bic_cbnz: +; HBC: // %bb.0: // %common.ret +; HBC-NEXT: bics wzr, w1, w0 +; HBC-NEXT: cset w8, ne +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: ret %c = and i32 %a, %b %d = icmp eq i32 %c, %b - br i1 %d, label %L1, label %L2 + br i1 %d, label %L1, label %L2, !consistent !10 L1: store i32 0, ptr %ptr, align 4 ret void @@ -106,10 +144,19 @@ define void @test_add_tbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB5_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_tbz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: bc.pl .LBB5_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB5_2: // %L2 +; HBC-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp sge i32 %add, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %add, ptr %ptr, align 8 br label %L2 @@ -126,10 +173,19 @@ define void @test_subs_tbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB6_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_subs_tbz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w0, w1 +; HBC-NEXT: bc.pl .LBB6_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB6_2: // %L2 +; HBC-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp sge i32 %sub, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %sub, ptr %ptr, align 8 br label %L2 @@ -146,10 +202,19 @@ define void @test_add_tbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB7_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_add_tbnz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: adds w8, w0, w1 +; HBC-NEXT: bc.mi .LBB7_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB7_2: // %L2 +; HBC-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp slt i32 %add, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %add, ptr %ptr, align 8 br label %L2 @@ -166,10 +231,19 @@ define void @test_subs_tbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: .LBB8_2: // %L2 ; CHECK-NEXT: ret +; +; HBC-LABEL: test_subs_tbnz: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w0, w1 +; HBC-NEXT: bc.mi .LBB8_2 +; HBC-NEXT: // %bb.1: // %L1 +; HBC-NEXT: str w8, [x2] +; HBC-NEXT: .LBB8_2: // %L2 +; HBC-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp slt i32 %sub, 0 - br i1 %cmp36, label %L2, label %L1 + br i1 %cmp36, label %L2, label %L1, !consistent !10 L1: store i32 %sub, ptr %ptr, align 8 br label %L2 @@ -202,11 +276,32 @@ define void @test_call_clobber(i32 %unused, i32 %a) uwtable { ; CHECK-NEXT: .LBB9_2: // %if.then ; CHECK-NEXT: .cfi_restore_state ; CHECK-NEXT: bl foo +; +; HBC-LABEL: test_call_clobber: +; HBC: // %bb.0: // %entry +; HBC-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; HBC-NEXT: .cfi_def_cfa_offset 16 +; HBC-NEXT: .cfi_offset w19, -8 +; HBC-NEXT: .cfi_offset w30, -16 +; HBC-NEXT: .cfi_remember_state +; HBC-NEXT: and w19, w1, #0x6 +; HBC-NEXT: mov w0, w19 +; HBC-NEXT: bl bar +; HBC-NEXT: cbnz w19, .LBB9_2 +; HBC-NEXT: // %bb.1: // %if.end +; HBC-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; HBC-NEXT: .cfi_def_cfa_offset 0 +; HBC-NEXT: .cfi_restore w19 +; HBC-NEXT: .cfi_restore w30 +; HBC-NEXT: ret +; HBC-NEXT: .LBB9_2: // %if.then +; HBC-NEXT: .cfi_restore_state +; HBC-NEXT: bl foo entry: %c = and i32 %a, 6 call void @bar(i32 %c) %tobool = icmp eq i32 %c, 0 - br i1 %tobool, label %if.end, label %if.then + br i1 %tobool, label %if.end, label %if.then, !consistent !10 if.then: tail call void @foo() @@ -215,3 +310,5 @@ if.then: if.end: ret void } + +!10 = !{i1 true} diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index b63d540fb8e02..05d10dfb1a322 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+hbc -O3 < %s | FileCheck %s -check-prefix=HBC define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { ; CHECK-LABEL: loop1: @@ -68,16 +69,83 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: b.ne .LBB0_7 ; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup ; CHECK-NEXT: ret +; +; HBC-LABEL: loop1: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB0_8 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #6 +; HBC-NEXT: bc.hi .LBB0_3 +; HBC-NEXT: // %bb.2: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: b .LBB0_6 +; HBC-NEXT: .LBB0_3: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: add x12, x0, #4 +; HBC-NEXT: and x10, x11, #0x1fffffff8 +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: add x13, x1, #16 +; HBC-NEXT: add x8, x1, x10, lsl #2 +; HBC-NEXT: add x9, x0, x10 +; HBC-NEXT: mov x14, x10 +; HBC-NEXT: .LBB0_4: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp q1, q2, [x13, #-16] +; HBC-NEXT: subs x14, x14, #8 +; HBC-NEXT: add x13, x13, #32 +; HBC-NEXT: fcmgt v3.4s, v1.4s, v0.4s +; HBC-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; HBC-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; HBC-NEXT: fcmlt v6.4s, v2.4s, #0.0 +; HBC-NEXT: bit v1.16b, v0.16b, v3.16b +; HBC-NEXT: bit v2.16b, v0.16b, v4.16b +; HBC-NEXT: bic v1.16b, v1.16b, v5.16b +; HBC-NEXT: bic v2.16b, v2.16b, v6.16b +; HBC-NEXT: fcvtzs v1.4s, v1.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v1.4h, v1.4s +; HBC-NEXT: xtn v2.4h, v2.4s +; HBC-NEXT: xtn v1.8b, v1.8h +; HBC-NEXT: xtn v2.8b, v2.8h +; HBC-NEXT: mov v1.s[1], v2.s[0] +; HBC-NEXT: stur d1, [x12, #-4] +; HBC-NEXT: add x12, x12, #8 +; HBC-NEXT: bc.ne .LBB0_4 +; HBC-NEXT: // %bb.5: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.eq .LBB0_8 +; HBC-NEXT: .LBB0_6: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB0_7: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: ldr s1, [x8], #4 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s2, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: strb w12, [x9], #1 +; HBC-NEXT: bc.ne .LBB0_7 +; HBC-NEXT: .LBB0_8: // %for.cond.cleanup +; HBC-NEXT: ret entry: %cmp9 = icmp sgt i32 %width, 0 - br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 7 - br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph + br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph, !consistent !10 vector.ph: ; preds = %for.body.preheader %n.vec = and i64 %2, 8589934584 @@ -108,11 +176,11 @@ vector.body: ; preds = %vector.body, %vecto store <4 x i8> %13, ptr %14, align 1 %index.next = add nuw i64 %index, 8 %15 = icmp eq i64 %index.next, %n.vec - br i1 %15, label %middle.block, label %vector.body + br i1 %15, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21, !consistent !10 for.body.preheader21: ; preds = %for.body.preheader, %middle.block %i.012.ph = phi i32 [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -138,7 +206,7 @@ for.body: ; preds = %for.body.preheader2 %add.ptr2 = getelementptr inbounds i8, ptr %dst.addr.010, i64 1 %inc = add nuw nsw i32 %i.012, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -219,16 +287,94 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: cmp x11, x10 ; CHECK-NEXT: b.ne .LBB1_5 ; CHECK-NEXT: b .LBB1_7 +; +; HBC-LABEL: loop2: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB1_7 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB1_4 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: ubfiz x9, x8, #1, #32 +; HBC-NEXT: add x9, x9, #2 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: bc.ls .LBB1_8 +; HBC-NEXT: // %bb.3: // %vector.memcheck +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x9, x1 +; HBC-NEXT: b.ls .LBB1_8 +; HBC-NEXT: .LBB1_4: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB1_5: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB1_6: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8], #8 +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s2, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcvtzs w13, s2 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: add x9, x9, #2 +; HBC-NEXT: bc.ne .LBB1_6 +; HBC-NEXT: .LBB1_7: // %for.cond.cleanup +; HBC-NEXT: ret +; HBC-NEXT: .LBB1_8: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: add x8, x1, x10, lsl #3 +; HBC-NEXT: add x9, x0, x10, lsl #1 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: .LBB1_9: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v3.4s, v1.4s, v0.4s +; HBC-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; HBC-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; HBC-NEXT: bsl v3.16b, v0.16b, v1.16b +; HBC-NEXT: bsl v4.16b, v0.16b, v2.16b +; HBC-NEXT: fcmlt v1.4s, v2.4s, #0.0 +; HBC-NEXT: bic v2.16b, v3.16b, v5.16b +; HBC-NEXT: bic v1.16b, v4.16b, v1.16b +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: fcvtzs v1.4s, v1.4s +; HBC-NEXT: xtn v2.4h, v2.4s +; HBC-NEXT: xtn v1.4h, v1.4s +; HBC-NEXT: trn1 v1.8b, v2.8b, v1.8b +; HBC-NEXT: str d1, [x0], #8 +; HBC-NEXT: bc.ne .LBB1_9 +; HBC-NEXT: // %bb.10: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB1_5 +; HBC-NEXT: b .LBB1_7 entry: %cmp19 = icmp sgt i32 %width, 0 - br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -240,7 +386,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep24, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader35, label %vector.ph + br i1 %found.conflict, label %for.body.preheader35, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -274,11 +420,11 @@ vector.body: ; preds = %vector.body, %vecto store <8 x i8> %interleaved.vec, ptr %21, align 1 %index.next = add nuw i64 %index, 4 %22 = icmp eq i64 %index.next, %n.vec - br i1 %22, label %middle.block, label %vector.body + br i1 %22, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35, !consistent !10 for.body.preheader35: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.022.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -313,7 +459,7 @@ for.body: ; preds = %for.body.preheader3 %add.ptr6 = getelementptr inbounds i8, ptr %dst.addr.020, i64 2 %inc = add nuw nsw i32 %i.022, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -411,16 +557,111 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: b.ne .LBB2_8 ; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup ; CHECK-NEXT: ret +; +; HBC-LABEL: loop3: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB2_9 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB2_6 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: add x9, x8, w8, uxtw #1 +; HBC-NEXT: add x9, x9, #3 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: ccmp x9, x1, #0, hi +; HBC-NEXT: b.hi .LBB2_6 +; HBC-NEXT: // %bb.3: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: adrp x12, .LCPI2_0 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: ldr q1, [x12, :lo12:.LCPI2_0] +; HBC-NEXT: add x9, x10, x10, lsl #1 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: add x8, x1, x9, lsl #2 +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: .LBB2_4: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 +; HBC-NEXT: add x13, x0, #8 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v5.4s, v2.4s, v0.4s +; HBC-NEXT: fcmgt v6.4s, v3.4s, v0.4s +; HBC-NEXT: fcmgt v7.4s, v4.4s, v0.4s +; HBC-NEXT: fcmlt v16.4s, v2.4s, #0.0 +; HBC-NEXT: fcmlt v17.4s, v3.4s, #0.0 +; HBC-NEXT: bsl v5.16b, v0.16b, v2.16b +; HBC-NEXT: bsl v6.16b, v0.16b, v3.16b +; HBC-NEXT: bsl v7.16b, v0.16b, v4.16b +; HBC-NEXT: fcmlt v2.4s, v4.4s, #0.0 +; HBC-NEXT: bic v3.16b, v5.16b, v16.16b +; HBC-NEXT: bic v4.16b, v6.16b, v17.16b +; HBC-NEXT: bic v2.16b, v7.16b, v2.16b +; HBC-NEXT: fcvtzs v3.4s, v3.4s +; HBC-NEXT: fcvtzs v4.4s, v4.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v5.4h, v3.4s +; HBC-NEXT: xtn v6.4h, v4.4s +; HBC-NEXT: xtn v7.4h, v2.4s +; HBC-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b +; HBC-NEXT: st1 { v2.s }[2], [x13] +; HBC-NEXT: str d2, [x0], #12 +; HBC-NEXT: bc.ne .LBB2_4 +; HBC-NEXT: // %bb.5: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB2_7 +; HBC-NEXT: b .LBB2_9 +; HBC-NEXT: .LBB2_6: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB2_7: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB2_8: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8] +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s4, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: ldr s3, [x8, #8] +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: add x8, x8, #12 +; HBC-NEXT: fcsel s4, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcsel s2, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w13, s4 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: fcvtzs w14, s2 +; HBC-NEXT: strb w14, [x9, #2] +; HBC-NEXT: add x9, x9, #3 +; HBC-NEXT: bc.ne .LBB2_8 +; HBC-NEXT: .LBB2_9: // %for.cond.cleanup +; HBC-NEXT: ret entry: %cmp29 = icmp sgt i32 %width, 0 - br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -432,7 +673,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep34, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader46, label %vector.ph + br i1 %found.conflict, label %for.body.preheader46, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -474,11 +715,11 @@ vector.body: ; preds = %vector.body, %vecto store <12 x i8> %interleaved.vec, ptr %26, align 1 %index.next = add nuw i64 %index, 4 %29 = icmp eq i64 %index.next, %n.vec - br i1 %29, label %middle.block, label %vector.body + br i1 %29, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46, !consistent !10 for.body.preheader46: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.032.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -522,7 +763,7 @@ for.body: ; preds = %for.body.preheader4 %add.ptr10 = getelementptr inbounds i8, ptr %dst.addr.030, i64 3 %inc = add nuw nsw i32 %i.032, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) { @@ -631,16 +872,122 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: cmp x11, x10 ; CHECK-NEXT: b.ne .LBB3_5 ; CHECK-NEXT: b .LBB3_7 +; +; HBC-LABEL: loop4: +; HBC: // %bb.0: // %entry +; HBC-NEXT: subs w8, w2, #1 +; HBC-NEXT: bc.lt .LBB3_7 +; HBC-NEXT: // %bb.1: // %for.body.preheader +; HBC-NEXT: cmp w8, #2 +; HBC-NEXT: bc.ls .LBB3_4 +; HBC-NEXT: // %bb.2: // %vector.memcheck +; HBC-NEXT: ubfiz x9, x8, #2, #32 +; HBC-NEXT: add x9, x9, #4 +; HBC-NEXT: add x10, x1, x9, lsl #2 +; HBC-NEXT: cmp x10, x0 +; HBC-NEXT: bc.ls .LBB3_8 +; HBC-NEXT: // %bb.3: // %vector.memcheck +; HBC-NEXT: add x9, x0, x9 +; HBC-NEXT: cmp x9, x1 +; HBC-NEXT: b.ls .LBB3_8 +; HBC-NEXT: .LBB3_4: +; HBC-NEXT: mov w10, wzr +; HBC-NEXT: mov x8, x1 +; HBC-NEXT: mov x9, x0 +; HBC-NEXT: .LBB3_5: // %for.body.preheader1 +; HBC-NEXT: movi d0, #0000000000000000 +; HBC-NEXT: sub w10, w2, w10 +; HBC-NEXT: mov w11, #1132396544 // =0x437f0000 +; HBC-NEXT: .LBB3_6: // %for.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ldp s1, s3, [x8] +; HBC-NEXT: fmov s2, w11 +; HBC-NEXT: fcmp s1, s2 +; HBC-NEXT: fcsel s4, s2, s1, gt +; HBC-NEXT: fcmp s1, #0.0 +; HBC-NEXT: fcsel s1, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: fcsel s4, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: ldp s3, s5, [x8, #8] +; HBC-NEXT: fcvtzs w12, s1 +; HBC-NEXT: add x8, x8, #16 +; HBC-NEXT: fcsel s4, s0, s4, mi +; HBC-NEXT: fcmp s3, s2 +; HBC-NEXT: strb w12, [x9] +; HBC-NEXT: fcsel s6, s2, s3, gt +; HBC-NEXT: fcmp s3, #0.0 +; HBC-NEXT: fcvtzs w13, s4 +; HBC-NEXT: fcsel s3, s0, s6, mi +; HBC-NEXT: fcmp s5, s2 +; HBC-NEXT: strb w13, [x9, #1] +; HBC-NEXT: fcsel s2, s2, s5, gt +; HBC-NEXT: fcmp s5, #0.0 +; HBC-NEXT: fcvtzs w14, s3 +; HBC-NEXT: fcsel s2, s0, s2, mi +; HBC-NEXT: subs w10, w10, #1 +; HBC-NEXT: strb w14, [x9, #2] +; HBC-NEXT: fcvtzs w15, s2 +; HBC-NEXT: strb w15, [x9, #3] +; HBC-NEXT: add x9, x9, #4 +; HBC-NEXT: bc.ne .LBB3_6 +; HBC-NEXT: .LBB3_7: // %for.cond.cleanup +; HBC-NEXT: ret +; HBC-NEXT: .LBB3_8: // %vector.ph +; HBC-NEXT: add x11, x8, #1 +; HBC-NEXT: mov w8, #1132396544 // =0x437f0000 +; HBC-NEXT: adrp x12, .LCPI3_0 +; HBC-NEXT: and x10, x11, #0x1fffffffc +; HBC-NEXT: dup v0.4s, w8 +; HBC-NEXT: ldr q1, [x12, :lo12:.LCPI3_0] +; HBC-NEXT: add x8, x1, x10, lsl #4 +; HBC-NEXT: add x9, x0, x10, lsl #2 +; HBC-NEXT: mov x12, x10 +; HBC-NEXT: .LBB3_9: // %vector.body +; HBC-NEXT: // =>This Inner Loop Header: Depth=1 +; HBC-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 +; HBC-NEXT: subs x12, x12, #4 +; HBC-NEXT: fcmgt v6.4s, v2.4s, v0.4s +; HBC-NEXT: fcmgt v7.4s, v3.4s, v0.4s +; HBC-NEXT: fcmgt v16.4s, v4.4s, v0.4s +; HBC-NEXT: fcmgt v17.4s, v5.4s, v0.4s +; HBC-NEXT: fcmlt v18.4s, v2.4s, #0.0 +; HBC-NEXT: fcmlt v19.4s, v3.4s, #0.0 +; HBC-NEXT: fcmlt v20.4s, v4.4s, #0.0 +; HBC-NEXT: bsl v6.16b, v0.16b, v2.16b +; HBC-NEXT: bsl v7.16b, v0.16b, v3.16b +; HBC-NEXT: bsl v16.16b, v0.16b, v4.16b +; HBC-NEXT: bsl v17.16b, v0.16b, v5.16b +; HBC-NEXT: fcmlt v2.4s, v5.4s, #0.0 +; HBC-NEXT: bic v3.16b, v6.16b, v18.16b +; HBC-NEXT: bic v4.16b, v7.16b, v19.16b +; HBC-NEXT: bic v5.16b, v16.16b, v20.16b +; HBC-NEXT: bic v2.16b, v17.16b, v2.16b +; HBC-NEXT: fcvtzs v3.4s, v3.4s +; HBC-NEXT: fcvtzs v4.4s, v4.4s +; HBC-NEXT: fcvtzs v5.4s, v5.4s +; HBC-NEXT: fcvtzs v2.4s, v2.4s +; HBC-NEXT: xtn v16.4h, v3.4s +; HBC-NEXT: xtn v17.4h, v4.4s +; HBC-NEXT: xtn v18.4h, v5.4s +; HBC-NEXT: xtn v19.4h, v2.4s +; HBC-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; HBC-NEXT: str q2, [x0], #16 +; HBC-NEXT: bc.ne .LBB3_9 +; HBC-NEXT: // %bb.10: // %middle.block +; HBC-NEXT: cmp x11, x10 +; HBC-NEXT: bc.ne .LBB3_5 +; HBC-NEXT: b .LBB3_7 entry: %cmp39 = icmp sgt i32 %width, 0 - br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup + br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup, !consistent !10 for.body.preheader: ; preds = %entry %0 = add i32 %width, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 %min.iters.check = icmp ult i32 %0, 3 - br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck + br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck, !consistent !10 vector.memcheck: ; preds = %for.body.preheader %3 = add i32 %width, -1 @@ -652,7 +999,7 @@ vector.memcheck: ; preds = %for.body.preheader %bound0 = icmp ugt ptr %scevgep44, %dst %bound1 = icmp ugt ptr %scevgep, %data %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %for.body.preheader57, label %vector.ph + br i1 %found.conflict, label %for.body.preheader57, label %vector.ph, !consistent !10 vector.ph: ; preds = %vector.memcheck %n.vec = and i64 %2, 8589934588 @@ -700,11 +1047,11 @@ vector.body: ; preds = %vector.body, %vecto store <16 x i8> %interleaved.vec, ptr %31, align 1 %index.next = add nuw i64 %index, 4 %34 = icmp eq i64 %index.next, %n.vec - br i1 %34, label %middle.block, label %vector.body + br i1 %34, label %middle.block, label %vector.body, !consistent !10 middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %2, %n.vec - br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57, !consistent !10 for.body.preheader57: ; preds = %vector.memcheck, %for.body.preheader, %middle.block %i.042.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] @@ -757,5 +1104,7 @@ for.body: ; preds = %for.body.preheader5 %add.ptr14 = getelementptr inbounds i8, ptr %dst.addr.040, i64 4 %inc = add nuw nsw i32 %i.042, 1 %exitcond.not = icmp eq i32 %inc, %width - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10 } + +!10 = !{i1 true}