19
19
#include " llvm/Analysis/BlockFrequencyInfo.h"
20
20
#include " llvm/Analysis/CFG.h"
21
21
#include " llvm/Analysis/CodeMetrics.h"
22
+ #include " llvm/Analysis/DomTreeUpdater.h"
22
23
#include " llvm/Analysis/GuardUtils.h"
23
24
#include " llvm/Analysis/LoopAnalysisManager.h"
24
25
#include " llvm/Analysis/LoopInfo.h"
@@ -73,6 +74,7 @@ using namespace llvm::PatternMatch;
73
74
74
75
STATISTIC (NumBranches, " Number of branches unswitched" );
75
76
STATISTIC (NumSwitches, " Number of switches unswitched" );
77
+ STATISTIC (NumSelects, " Number of selects turned into branches for unswitching" );
76
78
STATISTIC (NumGuards, " Number of guards turned into branches for unswitching" );
77
79
STATISTIC (NumTrivial, " Number of unswitches that are trivial" );
78
80
STATISTIC (
@@ -2079,7 +2081,7 @@ static void unswitchNontrivialInvariants(
2079
2081
AssumptionCache &AC,
2080
2082
function_ref<void (bool , bool , ArrayRef<Loop *>)> UnswitchCB,
2081
2083
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2082
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2084
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze ) {
2083
2085
auto *ParentBB = TI.getParent ();
2084
2086
BranchInst *BI = dyn_cast<BranchInst>(&TI);
2085
2087
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2181,25 +2183,6 @@ static void unswitchNontrivialInvariants(
2181
2183
SE->forgetBlockAndLoopDispositions ();
2182
2184
}
2183
2185
2184
- bool InsertFreeze = false ;
2185
- if (FreezeLoopUnswitchCond) {
2186
- ICFLoopSafetyInfo SafetyInfo;
2187
- SafetyInfo.computeLoopSafetyInfo (&L);
2188
- InsertFreeze = !SafetyInfo.isGuaranteedToExecute (TI, &DT, &L);
2189
- }
2190
-
2191
- // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
2192
- // otherwise the branch instruction will have been moved outside the loop
2193
- // already, and may imply that a poison condition is always UB.
2194
- Value *FullUnswitchCond = nullptr ;
2195
- if (FullUnswitch) {
2196
- FullUnswitchCond =
2197
- BI ? skipTrivialSelect (BI->getCondition ()) : SI->getCondition ();
2198
- if (InsertFreeze)
2199
- InsertFreeze = !isGuaranteedNotToBeUndefOrPoison (
2200
- FullUnswitchCond, &AC, L.getLoopPreheader ()->getTerminator (), &DT);
2201
- }
2202
-
2203
2186
// If the edge from this terminator to a successor dominates that successor,
2204
2187
// store a map from each block in its dominator subtree to it. This lets us
2205
2188
// tell when cloning for a particular successor if a block is dominated by
@@ -2274,10 +2257,11 @@ static void unswitchNontrivialInvariants(
2274
2257
BasicBlock *ClonedPH = ClonedPHs.begin ()->second ;
2275
2258
BI->setSuccessor (ClonedSucc, ClonedPH);
2276
2259
BI->setSuccessor (1 - ClonedSucc, LoopPH);
2260
+ Value *Cond = skipTrivialSelect (BI->getCondition ());
2277
2261
if (InsertFreeze)
2278
- FullUnswitchCond = new FreezeInst (
2279
- FullUnswitchCond, FullUnswitchCond ->getName () + " .fr" , BI);
2280
- BI->setCondition (FullUnswitchCond );
2262
+ Cond = new FreezeInst (
2263
+ Cond, Cond ->getName () + " .fr" , BI);
2264
+ BI->setCondition (Cond );
2281
2265
DTUpdates.push_back ({DominatorTree::Insert, SplitBB, ClonedPH});
2282
2266
} else {
2283
2267
assert (SI && " Must either be a branch or switch!" );
@@ -2294,7 +2278,7 @@ static void unswitchNontrivialInvariants(
2294
2278
2295
2279
if (InsertFreeze)
2296
2280
SI->setCondition (new FreezeInst (
2297
- FullUnswitchCond, FullUnswitchCond ->getName () + " .fr" , SI));
2281
+ SI-> getCondition (), SI-> getCondition () ->getName () + " .fr" , SI));
2298
2282
2299
2283
// We need to use the set to populate domtree updates as even when there
2300
2284
// are multiple cases pointing at the same successor we only want to
@@ -2593,6 +2577,58 @@ static InstructionCost computeDomSubtreeCost(
2593
2577
return Cost;
2594
2578
}
2595
2579
2580
+ // / Turns a select instruction into implicit control flow branch,
2581
+ // / making the following replacement:
2582
+ // /
2583
+ // / head:
2584
+ // / --code before select--
2585
+ // / select %cond, %trueval, %falseval
2586
+ // / --code after select--
2587
+ // /
2588
+ // / into
2589
+ // /
2590
+ // / head:
2591
+ // / --code before select--
2592
+ // / br i1 %cond, label %then, label %tail
2593
+ // /
2594
+ // / then:
2595
+ // / br %tail
2596
+ // /
2597
+ // / tail:
2598
+ // / phi [ %trueval, %then ], [ %falseval, %head]
2599
+ // / unreachable
2600
+ // /
2601
+ // / It also makes all relevant DT and LI updates, so that all structures are in
2602
+ // / valid state after this transform.
2603
+ static BranchInst *turnSelectIntoBranch (SelectInst *SI, DominatorTree &DT,
2604
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
2605
+ AssumptionCache *AC) {
2606
+ LLVM_DEBUG (dbgs () << " Turning " << *SI << " into a branch.\n " );
2607
+ BasicBlock *HeadBB = SI->getParent ();
2608
+
2609
+ DomTreeUpdater DTU =
2610
+ DomTreeUpdater (DT, DomTreeUpdater::UpdateStrategy::Eager);
2611
+ SplitBlockAndInsertIfThen (SI->getCondition (), SI, false ,
2612
+ SI->getMetadata (LLVMContext::MD_prof), &DTU, &LI);
2613
+ auto *CondBr = cast<BranchInst>(HeadBB->getTerminator ());
2614
+ BasicBlock *ThenBB = CondBr->getSuccessor (0 ),
2615
+ *TailBB = CondBr->getSuccessor (1 );
2616
+ if (MSSAU)
2617
+ MSSAU->moveAllAfterSpliceBlocks (HeadBB, TailBB, SI);
2618
+
2619
+ PHINode *Phi = PHINode::Create (SI->getType (), 2 , " unswitched.select" , SI);
2620
+ Phi->addIncoming (SI->getTrueValue (), ThenBB);
2621
+ Phi->addIncoming (SI->getFalseValue (), HeadBB);
2622
+ SI->replaceAllUsesWith (Phi);
2623
+ SI->eraseFromParent ();
2624
+
2625
+ if (MSSAU && VerifyMemorySSA)
2626
+ MSSAU->getMemorySSA ()->verifyMemorySSA ();
2627
+
2628
+ ++NumSelects;
2629
+ return CondBr;
2630
+ }
2631
+
2596
2632
// / Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2597
2633
// / making the following replacement:
2598
2634
// /
@@ -2700,9 +2736,10 @@ static int CalculateUnswitchCostMultiplier(
2700
2736
const BasicBlock *CondBlock = TI.getParent ();
2701
2737
if (DT.dominates (CondBlock, Latch) &&
2702
2738
(isGuard (&TI) ||
2703
- llvm::count_if (successors (&TI), [&L](const BasicBlock *SuccBB) {
2704
- return L.contains (SuccBB);
2705
- }) <= 1 )) {
2739
+ (TI.isTerminator () &&
2740
+ llvm::count_if (successors (&TI), [&L](const BasicBlock *SuccBB) {
2741
+ return L.contains (SuccBB);
2742
+ }) <= 1 ))) {
2706
2743
NumCostMultiplierSkipped++;
2707
2744
return 1 ;
2708
2745
}
@@ -2711,12 +2748,17 @@ static int CalculateUnswitchCostMultiplier(
2711
2748
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector ().size ()
2712
2749
: std::distance (LI.begin (), LI.end ()));
2713
2750
// Count amount of clones that all the candidates might cause during
2714
- // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2751
+ // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2752
+ // cases.
2715
2753
int UnswitchedClones = 0 ;
2716
2754
for (auto Candidate : UnswitchCandidates) {
2717
2755
const Instruction *CI = Candidate.TI ;
2718
2756
const BasicBlock *CondBlock = CI->getParent ();
2719
2757
bool SkipExitingSuccessors = DT.dominates (CondBlock, Latch);
2758
+ if (isa<SelectInst>(CI)) {
2759
+ UnswitchedClones++;
2760
+ continue ;
2761
+ }
2720
2762
if (isGuard (CI)) {
2721
2763
if (!SkipExitingSuccessors)
2722
2764
UnswitchedClones++;
@@ -2779,15 +2821,20 @@ static bool collectUnswitchCandidates(
2779
2821
if (LI.getLoopFor (BB) != &L)
2780
2822
continue ;
2781
2823
2782
- if (CollectGuards)
2783
- for (auto &I : *BB)
2784
- if (isGuard (&I)) {
2785
- auto *Cond =
2786
- skipTrivialSelect (cast<IntrinsicInst>(&I)->getArgOperand (0 ));
2787
- // TODO: Support AND, OR conditions and partial unswitching.
2788
- if (!isa<Constant>(Cond) && L.isLoopInvariant (Cond))
2789
- UnswitchCandidates.push_back ({&I, {Cond}});
2790
- }
2824
+ for (auto &I : *BB) {
2825
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
2826
+ auto *Cond = SI->getCondition ();
2827
+ // restrict to simple boolean selects
2828
+ if (!isa<Constant>(Cond) && L.isLoopInvariant (Cond) && Cond->getType ()->isIntegerTy (1 ))
2829
+ UnswitchCandidates.push_back ({&I, {Cond}});
2830
+ } else if (CollectGuards && isGuard (&I)) {
2831
+ auto *Cond =
2832
+ skipTrivialSelect (cast<IntrinsicInst>(&I)->getArgOperand (0 ));
2833
+ // TODO: Support AND, OR conditions and partial unswitching.
2834
+ if (!isa<Constant>(Cond) && L.isLoopInvariant (Cond))
2835
+ UnswitchCandidates.push_back ({&I, {Cond}});
2836
+ }
2837
+ }
2791
2838
2792
2839
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator ())) {
2793
2840
// We can only consider fully loop-invariant switch conditions as we need
@@ -2992,7 +3039,8 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
2992
3039
// loop. This is computing the new cost of unswitching a condition.
2993
3040
// Note that guards always have 2 unique successors that are implicit and
2994
3041
// will be materialized if we decide to unswitch it.
2995
- int SuccessorsCount = isGuard (&TI) ? 2 : Visited.size ();
3042
+ int SuccessorsCount =
3043
+ isGuard (&TI) || isa<SelectInst>(TI) ? 2 : Visited.size ();
2996
3044
assert (SuccessorsCount > 1 &&
2997
3045
" Cannot unswitch a condition without multiple distinct successors!" );
2998
3046
return (LoopCost - Cost) * (SuccessorsCount - 1 );
@@ -3033,6 +3081,32 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3033
3081
return *Best;
3034
3082
}
3035
3083
3084
+ // Insert a freeze on an unswitched branch if all is true:
3085
+ // 1. freeze-loop-unswitch-cond option is true
3086
+ // 2. The branch may not execute in the loop pre-transformation. If a branch may
3087
+ // not execute and could cause UB, it would always cause UB if it is hoisted outside
3088
+ // of the loop. Insert a freeze to prevent this case.
3089
+ // 3. The branch condition may be poison or undef
3090
+ static bool shouldInsertFreeze (Loop &L, Instruction &TI, DominatorTree &DT,
3091
+ AssumptionCache &AC) {
3092
+ assert (isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3093
+ if (!FreezeLoopUnswitchCond)
3094
+ return false ;
3095
+
3096
+ ICFLoopSafetyInfo SafetyInfo;
3097
+ SafetyInfo.computeLoopSafetyInfo (&L);
3098
+ if (SafetyInfo.isGuaranteedToExecute (TI, &DT, &L))
3099
+ return false ;
3100
+
3101
+ Value *Cond;
3102
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3103
+ Cond = skipTrivialSelect (BI->getCondition ());
3104
+ else
3105
+ Cond = skipTrivialSelect (cast<SwitchInst>(&TI)->getCondition ());
3106
+ return !isGuaranteedNotToBeUndefOrPoison (
3107
+ Cond, &AC, L.getLoopPreheader ()->getTerminator (), &DT);
3108
+ }
3109
+
3036
3110
static bool unswitchBestCondition (
3037
3111
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3038
3112
AAResults &AA, TargetTransformInfo &TTI,
@@ -3068,15 +3142,28 @@ static bool unswitchBestCondition(
3068
3142
if (Best.TI != PartialIVCondBranch)
3069
3143
PartialIVInfo.InstToDuplicate .clear ();
3070
3144
3071
- // If the best candidate is a guard, turn it into a branch.
3072
- if (isGuard (Best.TI ))
3073
- Best.TI =
3074
- turnGuardIntoBranch (cast<IntrinsicInst>(Best.TI ), L, DT, LI, MSSAU);
3145
+ bool InsertFreeze;
3146
+ if (auto *SI = dyn_cast<SelectInst>(Best.TI )) {
3147
+ // If the best candidate is a select, turn it into a branch. Select
3148
+ // instructions with a poison conditional do not propagate poison, but
3149
+ // branching on poison causes UB. Insert a freeze on the select
3150
+ // conditional to prevent UB after turning the select into a branch.
3151
+ InsertFreeze = !isGuaranteedNotToBeUndefOrPoison (
3152
+ SI->getCondition (), &AC, L.getLoopPreheader ()->getTerminator (), &DT);
3153
+ Best.TI = turnSelectIntoBranch (SI, DT, LI, MSSAU, &AC);
3154
+ } else {
3155
+ // If the best candidate is a guard, turn it into a branch.
3156
+ if (isGuard (Best.TI ))
3157
+ Best.TI =
3158
+ turnGuardIntoBranch (cast<IntrinsicInst>(Best.TI ), L, DT, LI, MSSAU);
3159
+ InsertFreeze = shouldInsertFreeze (L, *Best.TI , DT, AC);
3160
+ }
3075
3161
3076
3162
LLVM_DEBUG (dbgs () << " Unswitching non-trivial (cost = " << Best.Cost
3077
3163
<< " ) terminator: " << *Best.TI << " \n " );
3078
3164
unswitchNontrivialInvariants (L, *Best.TI , Best.Invariants , PartialIVInfo, DT,
3079
- LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3165
+ LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
3166
+ InsertFreeze);
3080
3167
return true ;
3081
3168
}
3082
3169
0 commit comments