Skip to content

Commit c2b24cd

Browse files
caojoshuavchuravy
authored andcommitted
[SimpleLoopUnswitch][reland 2] unswitch selects
The old LoopUnswitch pass unswitched selects, but the changes were never ported to the new SimpleLoopUnswitch. We unswitch by turning: ``` S = select %cond, %a, %b ``` into: ``` head: br %cond, label %then, label %tail then: br label %tail tail: S = phi [ %a, %then ], [ %b, %head ] ``` Unswitch selects are always nontrivial, since the successors do not exit the loop and the loop body always needs to be cloned. Unswitch selects always need to freeze the conditional if the conditional could be poison or undef. Selects don't propagate poison/undef, and branches on poison/undef causes UB. Reland 1 - Fix the insertion of freeze instructions. The original implementation inserts a dead freeze instruction that is not used by the unswitched branch. Reland 2 - Include https://reviews.llvm.org/D149560 in the same patch, which was originally reverted along with this patch. The patch prevents unswitching of selects with a vector conditional. This could have been caught in SimpleLoopUnswitch/crash.ll if it included tests for nontrivial unswitching. This reland also adds a run for the test file with nontrivial unswitching. Reviewed By: nikic, kachkov98, vitalybuka Differential Revision: https://reviews.llvm.org/D138526 (cherry picked from commit 5cfb9aa)
1 parent d165308 commit c2b24cd

File tree

4 files changed

+164
-58
lines changed

4 files changed

+164
-58
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

+130-43
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Analysis/BlockFrequencyInfo.h"
2020
#include "llvm/Analysis/CFG.h"
2121
#include "llvm/Analysis/CodeMetrics.h"
22+
#include "llvm/Analysis/DomTreeUpdater.h"
2223
#include "llvm/Analysis/GuardUtils.h"
2324
#include "llvm/Analysis/LoopAnalysisManager.h"
2425
#include "llvm/Analysis/LoopInfo.h"
@@ -73,6 +74,7 @@ using namespace llvm::PatternMatch;
7374

7475
STATISTIC(NumBranches, "Number of branches unswitched");
7576
STATISTIC(NumSwitches, "Number of switches unswitched");
77+
STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
7678
STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
7779
STATISTIC(NumTrivial, "Number of unswitches that are trivial");
7880
STATISTIC(
@@ -2079,7 +2081,7 @@ static void unswitchNontrivialInvariants(
20792081
AssumptionCache &AC,
20802082
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
20812083
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2082-
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2084+
function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze) {
20832085
auto *ParentBB = TI.getParent();
20842086
BranchInst *BI = dyn_cast<BranchInst>(&TI);
20852087
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2181,25 +2183,6 @@ static void unswitchNontrivialInvariants(
21812183
SE->forgetBlockAndLoopDispositions();
21822184
}
21832185

2184-
bool InsertFreeze = false;
2185-
if (FreezeLoopUnswitchCond) {
2186-
ICFLoopSafetyInfo SafetyInfo;
2187-
SafetyInfo.computeLoopSafetyInfo(&L);
2188-
InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2189-
}
2190-
2191-
// Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
2192-
// otherwise the branch instruction will have been moved outside the loop
2193-
// already, and may imply that a poison condition is always UB.
2194-
Value *FullUnswitchCond = nullptr;
2195-
if (FullUnswitch) {
2196-
FullUnswitchCond =
2197-
BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
2198-
if (InsertFreeze)
2199-
InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
2200-
FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
2201-
}
2202-
22032186
// If the edge from this terminator to a successor dominates that successor,
22042187
// store a map from each block in its dominator subtree to it. This lets us
22052188
// tell when cloning for a particular successor if a block is dominated by
@@ -2274,10 +2257,11 @@ static void unswitchNontrivialInvariants(
22742257
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
22752258
BI->setSuccessor(ClonedSucc, ClonedPH);
22762259
BI->setSuccessor(1 - ClonedSucc, LoopPH);
2260+
Value *Cond = skipTrivialSelect(BI->getCondition());
22772261
if (InsertFreeze)
2278-
FullUnswitchCond = new FreezeInst(
2279-
FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
2280-
BI->setCondition(FullUnswitchCond);
2262+
Cond = new FreezeInst(
2263+
Cond, Cond->getName() + ".fr", BI);
2264+
BI->setCondition(Cond);
22812265
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
22822266
} else {
22832267
assert(SI && "Must either be a branch or switch!");
@@ -2294,7 +2278,7 @@ static void unswitchNontrivialInvariants(
22942278

22952279
if (InsertFreeze)
22962280
SI->setCondition(new FreezeInst(
2297-
FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
2281+
SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
22982282

22992283
// We need to use the set to populate domtree updates as even when there
23002284
// are multiple cases pointing at the same successor we only want to
@@ -2593,6 +2577,58 @@ static InstructionCost computeDomSubtreeCost(
25932577
return Cost;
25942578
}
25952579

2580+
/// Turns a select instruction into implicit control flow branch,
2581+
/// making the following replacement:
2582+
///
2583+
/// head:
2584+
/// --code before select--
2585+
/// select %cond, %trueval, %falseval
2586+
/// --code after select--
2587+
///
2588+
/// into
2589+
///
2590+
/// head:
2591+
/// --code before select--
2592+
/// br i1 %cond, label %then, label %tail
2593+
///
2594+
/// then:
2595+
/// br %tail
2596+
///
2597+
/// tail:
2598+
/// phi [ %trueval, %then ], [ %falseval, %head]
2599+
/// unreachable
2600+
///
2601+
/// It also makes all relevant DT and LI updates, so that all structures are in
2602+
/// valid state after this transform.
2603+
static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
2604+
LoopInfo &LI, MemorySSAUpdater *MSSAU,
2605+
AssumptionCache *AC) {
2606+
LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2607+
BasicBlock *HeadBB = SI->getParent();
2608+
2609+
DomTreeUpdater DTU =
2610+
DomTreeUpdater(DT, DomTreeUpdater::UpdateStrategy::Eager);
2611+
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2612+
SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2613+
auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2614+
BasicBlock *ThenBB = CondBr->getSuccessor(0),
2615+
*TailBB = CondBr->getSuccessor(1);
2616+
if (MSSAU)
2617+
MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2618+
2619+
PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
2620+
Phi->addIncoming(SI->getTrueValue(), ThenBB);
2621+
Phi->addIncoming(SI->getFalseValue(), HeadBB);
2622+
SI->replaceAllUsesWith(Phi);
2623+
SI->eraseFromParent();
2624+
2625+
if (MSSAU && VerifyMemorySSA)
2626+
MSSAU->getMemorySSA()->verifyMemorySSA();
2627+
2628+
++NumSelects;
2629+
return CondBr;
2630+
}
2631+
25962632
/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
25972633
/// making the following replacement:
25982634
///
@@ -2700,9 +2736,10 @@ static int CalculateUnswitchCostMultiplier(
27002736
const BasicBlock *CondBlock = TI.getParent();
27012737
if (DT.dominates(CondBlock, Latch) &&
27022738
(isGuard(&TI) ||
2703-
llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2704-
return L.contains(SuccBB);
2705-
}) <= 1)) {
2739+
(TI.isTerminator() &&
2740+
llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2741+
return L.contains(SuccBB);
2742+
}) <= 1))) {
27062743
NumCostMultiplierSkipped++;
27072744
return 1;
27082745
}
@@ -2711,12 +2748,17 @@ static int CalculateUnswitchCostMultiplier(
27112748
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
27122749
: std::distance(LI.begin(), LI.end()));
27132750
// Count amount of clones that all the candidates might cause during
2714-
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2751+
// unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2752+
// cases.
27152753
int UnswitchedClones = 0;
27162754
for (auto Candidate : UnswitchCandidates) {
27172755
const Instruction *CI = Candidate.TI;
27182756
const BasicBlock *CondBlock = CI->getParent();
27192757
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2758+
if (isa<SelectInst>(CI)) {
2759+
UnswitchedClones++;
2760+
continue;
2761+
}
27202762
if (isGuard(CI)) {
27212763
if (!SkipExitingSuccessors)
27222764
UnswitchedClones++;
@@ -2779,15 +2821,20 @@ static bool collectUnswitchCandidates(
27792821
if (LI.getLoopFor(BB) != &L)
27802822
continue;
27812823

2782-
if (CollectGuards)
2783-
for (auto &I : *BB)
2784-
if (isGuard(&I)) {
2785-
auto *Cond =
2786-
skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2787-
// TODO: Support AND, OR conditions and partial unswitching.
2788-
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2789-
UnswitchCandidates.push_back({&I, {Cond}});
2790-
}
2824+
for (auto &I : *BB) {
2825+
if (auto *SI = dyn_cast<SelectInst>(&I)) {
2826+
auto *Cond = SI->getCondition();
2827+
// restrict to simple boolean selects
2828+
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond) && Cond->getType()->isIntegerTy(1))
2829+
UnswitchCandidates.push_back({&I, {Cond}});
2830+
} else if (CollectGuards && isGuard(&I)) {
2831+
auto *Cond =
2832+
skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2833+
// TODO: Support AND, OR conditions and partial unswitching.
2834+
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2835+
UnswitchCandidates.push_back({&I, {Cond}});
2836+
}
2837+
}
27912838

27922839
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
27932840
// We can only consider fully loop-invariant switch conditions as we need
@@ -2992,7 +3039,8 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
29923039
// loop. This is computing the new cost of unswitching a condition.
29933040
// Note that guards always have 2 unique successors that are implicit and
29943041
// will be materialized if we decide to unswitch it.
2995-
int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3042+
int SuccessorsCount =
3043+
isGuard(&TI) || isa<SelectInst>(TI) ? 2 : Visited.size();
29963044
assert(SuccessorsCount > 1 &&
29973045
"Cannot unswitch a condition without multiple distinct successors!");
29983046
return (LoopCost - Cost) * (SuccessorsCount - 1);
@@ -3033,6 +3081,32 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
30333081
return *Best;
30343082
}
30353083

3084+
// Insert a freeze on an unswitched branch if all is true:
3085+
// 1. freeze-loop-unswitch-cond option is true
3086+
// 2. The branch may not execute in the loop pre-transformation. If a branch may
3087+
// not execute and could cause UB, it would always cause UB if it is hoisted outside
3088+
// of the loop. Insert a freeze to prevent this case.
3089+
// 3. The branch condition may be poison or undef
3090+
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
3091+
AssumptionCache &AC) {
3092+
assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
3093+
if (!FreezeLoopUnswitchCond)
3094+
return false;
3095+
3096+
ICFLoopSafetyInfo SafetyInfo;
3097+
SafetyInfo.computeLoopSafetyInfo(&L);
3098+
if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3099+
return false;
3100+
3101+
Value *Cond;
3102+
if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3103+
Cond = skipTrivialSelect(BI->getCondition());
3104+
else
3105+
Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3106+
return !isGuaranteedNotToBeUndefOrPoison(
3107+
Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3108+
}
3109+
30363110
static bool unswitchBestCondition(
30373111
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
30383112
AAResults &AA, TargetTransformInfo &TTI,
@@ -3068,15 +3142,28 @@ static bool unswitchBestCondition(
30683142
if (Best.TI != PartialIVCondBranch)
30693143
PartialIVInfo.InstToDuplicate.clear();
30703144

3071-
// If the best candidate is a guard, turn it into a branch.
3072-
if (isGuard(Best.TI))
3073-
Best.TI =
3074-
turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3145+
bool InsertFreeze;
3146+
if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3147+
// If the best candidate is a select, turn it into a branch. Select
3148+
// instructions with a poison conditional do not propagate poison, but
3149+
// branching on poison causes UB. Insert a freeze on the select
3150+
// conditional to prevent UB after turning the select into a branch.
3151+
InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3152+
SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3153+
Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3154+
} else {
3155+
// If the best candidate is a guard, turn it into a branch.
3156+
if (isGuard(Best.TI))
3157+
Best.TI =
3158+
turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3159+
InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3160+
}
30753161

30763162
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
30773163
<< ") terminator: " << *Best.TI << "\n");
30783164
unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3079-
LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3165+
LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
3166+
InsertFreeze);
30803167
return true;
30813168
}
30823169

llvm/test/Transforms/SimpleLoopUnswitch/crash.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -passes=simple-loop-unswitch -verify-memoryssa -disable-output
2+
; RUN: opt < %s -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -disable-output
23

34
define void @test1(ptr %S2) {
45
entry:

llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll

+12-7
Original file line numberDiff line numberDiff line change
@@ -2332,21 +2332,26 @@ exit:
23322332
define i32 @test_partial_unswitch_all_conds_guaranteed_non_poison(i1 noundef %c.1, i1 noundef %c.2) {
23332333
; CHECK-LABEL: @test_partial_unswitch_all_conds_guaranteed_non_poison(
23342334
; CHECK-NEXT: entry:
2335-
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C_1:%.*]], [[C_2:%.*]]
2336-
; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]]
2335+
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
23372336
; CHECK: entry.split.us:
23382337
; CHECK-NEXT: br label [[LOOP_US:%.*]]
23392338
; CHECK: loop.us:
2340-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @a()
2341-
; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]]
2339+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
2340+
; CHECK-NEXT: br label [[TMP1:%.*]]
2341+
; CHECK: 1:
2342+
; CHECK-NEXT: br label [[TMP2:%.*]]
2343+
; CHECK: 2:
2344+
; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ [[C_2:%.*]], [[TMP1]] ]
2345+
; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
23422346
; CHECK: exit.split.us:
23432347
; CHECK-NEXT: br label [[EXIT:%.*]]
23442348
; CHECK: entry.split:
23452349
; CHECK-NEXT: br label [[LOOP:%.*]]
23462350
; CHECK: loop:
2347-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
2348-
; CHECK-NEXT: [[SEL:%.*]] = select i1 true, i1 true, i1 false
2349-
; CHECK-NEXT: br i1 [[SEL]], label [[LOOP]], label [[EXIT_SPLIT:%.*]]
2351+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @a()
2352+
; CHECK-NEXT: br label [[TMP4:%.*]]
2353+
; CHECK: 4:
2354+
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]]
23502355
; CHECK: exit.split:
23512356
; CHECK-NEXT: br label [[EXIT]]
23522357
; CHECK: exit:

llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll

+21-8
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,34 @@ define i32 @unswitch_trivial_select_cmp_outside(i32 %x) {
8888
; CHECK-LABEL: @unswitch_trivial_select_cmp_outside(
8989
; CHECK-NEXT: entry:
9090
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 100
91-
; CHECK-NEXT: br i1 [[C]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
91+
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
92+
; CHECK-NEXT: br i1 [[C_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
9293
; CHECK: entry.split.us:
9394
; CHECK-NEXT: br label [[LOOP_US:%.*]]
9495
; CHECK: loop.us:
95-
; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[LOOP_US]] ]
96-
; CHECK-NEXT: br label [[LOOP_US]]
96+
; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[TMP1:%.*]] ]
97+
; CHECK-NEXT: br label [[TMP0:%.*]]
98+
; CHECK: 0:
99+
; CHECK-NEXT: br label [[TMP1]]
100+
; CHECK: 1:
101+
; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ true, [[TMP0]] ]
102+
; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
103+
; CHECK: exit.split.us:
104+
; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[P_US]], [[TMP1]] ]
105+
; CHECK-NEXT: br label [[EXIT:%.*]]
97106
; CHECK: entry.split:
98107
; CHECK-NEXT: br label [[LOOP:%.*]]
99108
; CHECK: loop:
100-
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ]
101-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 false, i1 true, i1 false
102-
; CHECK-NEXT: br label [[EXIT:%.*]]
109+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 35, [[TMP2:%.*]] ]
110+
; CHECK-NEXT: br label [[TMP2]]
111+
; CHECK: 2:
112+
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]]
113+
; CHECK: exit.split:
114+
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[TMP2]] ]
115+
; CHECK-NEXT: br label [[EXIT]]
103116
; CHECK: exit:
104-
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[LOOP]] ]
105-
; CHECK-NEXT: ret i32 [[LCSSA]]
117+
; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[LCSSA]], [[EXIT_SPLIT]] ], [ [[LCSSA_US]], [[EXIT_SPLIT_US]] ]
118+
; CHECK-NEXT: ret i32 [[DOTUS_PHI]]
106119
;
107120
entry:
108121
%c = icmp ult i32 %x, 100

0 commit comments

Comments
 (0)