Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -4132,6 +4132,12 @@ if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) { + Constant *Res = ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, DL); + return Res->isOneValue(); + } + switch (Pred) { default: return false; @@ -4207,7 +4213,15 @@ DT) && isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT); + + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + return isTruePredicate(CmpInst::ICMP_UGE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_UGE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); } + } bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -95,6 +95,16 @@ cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); +static cl::opt SpeculativeFlattenBias( + "speculative-flatten-bias", cl::Hidden, cl::init(100), + cl::desc("Control how biased a branch needs to be to be considered rarely" + " failing for speculative flattening (default = 100)")); + +static cl::opt SpeculativeFlattenThreshold( + "speculative-flatten-threshold", cl::Hidden, cl::init(10), + cl::desc("Control how much speculation happens due to speculative" + " flattening (default = 10)")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -2665,6 +2675,130 @@ return Changed; } +/// If we have a series of tests leading to a frequently executed fallthrough +/// path and we can test all the conditions at once, we can execute a single +/// test on the fast path and figure out which condition failed on the slow +/// path. This transformation considers a pair of branches at a time since +/// recursively considering branches pairwise will cause an entire chain to +/// collapse. This transformation is code size neutral, but makes it +/// dynamically more expensive to fail either check. As such, we only want to +/// do this if both checks are expected to essentially never fail. +/// The key motivating examples are cases like: +/// br (5 > Length), in_bounds, fail1 +/// in_bounds: +/// br (6 > Length), in_bounds2, fail2 +/// in_bounds2: +/// ... +/// +/// We can rewrite this as: +/// br (6 > Length), in_bounds2, dispatch +/// in_bounds2: +/// ... +/// dispatch: +/// br (5 > Length), fail2, fail1 +/// +/// TODO: we could consider duplicating some (non-speculatable) instructions +/// from BI->getParent() down both paths +static bool SpeculativelyFlattenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + const DataLayout &DL) { + auto *PredBB = PBI->getParent(); + auto *BB = BI->getParent(); + + /// Is the failing path of this branch taken rarely if at all? + auto isRarelyUntaken = [](BranchInst *BI) { + uint64_t ProbTrue; + uint64_t ProbFalse; + if (!ExtractBranchMetadata(BI, ProbTrue, ProbFalse)) + return false; + return ProbTrue > ProbFalse * SpeculativeFlattenBias; + }; + + if (PBI->getSuccessor(0) != BB || + !isRarelyUntaken(PBI) || !isRarelyUntaken(BI) || + !isImpliedCondition(BI->getCondition(), PBI->getCondition(), DL)) + return false; + + // TODO: The following code performs a similiar, but slightly distinct + // transformation to that done by SpeculativelyExecuteBB. We should consider + // combining them at some point. + + // Can we speculate everything in the given block (except for the terminator + // instruction) at the instruction boundary before 'At'? + unsigned SpeculationCost = 0; + for (Instruction &I : *BB) { + if (isa(I)) break; + if (!isSafeToSpeculativelyExecute(&I, PBI)) + return false; + SpeculationCost++; + // Only flatten relatively small BBs to avoid making the bad case terrible + if (SpeculationCost > SpeculativeFlattenThreshold || isa(I)) + return false; + } + + // We need to check whether any of the instructions we may have examined when + // deciding the BI implies PBI have flags which might be control dependent on + // PBI. This can happen if (for instance), PBI ensures no overflow in the + // computaiton leading to BI's condition. We can't use the overflow bits to + // prove the original condition checking for overflow. This only considers + // the instructions within BB which actually contribute to BI's condition. + SmallVector Worklist; + SmallSet Visited; + Value *BICond = BI->getCondition(); + if (auto *CondI = dyn_cast(BICond)) + if (CondI->getParent() == BB) + Worklist.push_back(CondI); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + assert(I->getParent() == BB && "broken invariant"); + + if (!Visited.insert(I).second) + // already visited + continue; + + if (auto *OBO = dyn_cast(I)) + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) + // TODO: can we prove the flags are valid before PBI? + return false; + + for (Value *Op : I->operands()) + if (auto *OpI = dyn_cast(Op)) + if (OpI->getParent() == BB) + Worklist.push_back(OpI); + } + + + DEBUG(dbgs() << "Outlining slow path comparison: " + << *PBI->getCondition() << " implied by " + << *BI->getCondition() << "\n"); + // See the example in the function comment. + Value *WhichCond = PBI->getCondition(); + auto *Success = BI->getSuccessor(0); + auto *FailPBI = PBI->getSuccessor(1); + auto *FailBI = BI->getSuccessor(1); + // Have PBI branch directly to the fast path using BI's condition, branch + // to this BI's block for the slow path dispatch + PBI->setSuccessor(0, Success); + PBI->setSuccessor(1, BB); + PBI->setCondition(BI->getCondition()); + // Rewrite BI to distinguish between the two failing cases + BI->setSuccessor(0, FailBI); + BI->setSuccessor(1, FailPBI); + BI->setCondition(WhichCond); + // Move all of the instructions from BI->getParent other than + // the terminator into the fastpath. This requires speculating them past + // the original PBI branch, but we checked for that legality above. + // TODO: This doesn't handle dependent loads. We could duplicate those + // down both paths, but that involves further code growth. We need to + // figure out a good cost model here. + PredBB->getInstList().splice(BasicBlock::iterator(PBI), BB->getInstList(), + BB->begin(), std::prev(BB->end())); + + // To be conservatively correct, drop all metadata on the rewritten + // branches. TODO: update metadata + PBI->dropUnknownNonDebugMetadata(); + BI->dropUnknownNonDebugMetadata(); + return true; +} /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the @@ -2729,6 +2863,8 @@ isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) && PBI->getSuccessor(0) != PBI->getSuccessor(1) && BB->getSinglePredecessor()) { + DEBUG(dbgs() << "Branch (" << *BI << ") implied by predecessor (" + << *PBI << ")\n"); // Turn this into a branch on constant. auto *OldCond = BI->getCondition(); BI->setCondition(ConstantInt::getTrue(BB->getContext())); @@ -2736,6 +2872,10 @@ return true; // Nuke the branch on constant. } + // If BI could imply PBI, try to remove one check from the fast-path + if (SpeculativelyFlattenCondBranchToCondBranch(PBI, BI, DL)) + return true; + // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. Index: test/Transforms/InstSimplify/implies.ll =================================================================== --- test/Transforms/InstSimplify/implies.ll +++ test/Transforms/InstSimplify/implies.ll @@ -215,3 +215,50 @@ %res = icmp sge i1 %var30, %var29 ret i1 %res } + +define i1 @test16(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test16 +; CHECK: ret i1 true + %var29 = icmp ugt i32 %length.i, 5 + %var30 = icmp ugt i32 %length.i, 6 + %res = icmp ule i1 %var30, %var29 + ret i1 %res +} + +define i1 @test17(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test17 +; CHECK: ret i1 true + %var29 = icmp ugt i32 %length.i, 5 + %var30 = icmp ugt i32 %length.i, 5 + %res = icmp ule i1 %var30, %var29 + ret i1 %res +} + +; negative test (smaller range) +define i1 @test18(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test18 +; CHECK: ret i1 %res + %var29 = icmp ugt i32 %length.i, 5 + %var30 = icmp ugt i32 %length.i, 4 + %res = icmp ule i1 %var30, %var29 + ret i1 %res +} + +; negative test +define i1 @test19(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test19 +; CHECK: ret i1 %res + %var29 = icmp ugt i32 5, %length.i + %var30 = icmp ugt i32 6, %length.i + %res = icmp ule i1 %var30, %var29 + ret i1 %res +} + +define i1 @test20(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test20 +; CHECK: ret i1 true + %var29 = icmp ugt i32 5, %length.i + %var30 = icmp ugt i32 4, %length.i + %res = icmp ule i1 %var30, %var29 + ret i1 %res +} Index: test/Transforms/SimplifyCFG/fast-fallthrough.ll =================================================================== --- test/Transforms/SimplifyCFG/fast-fallthrough.ll +++ test/Transforms/SimplifyCFG/fast-fallthrough.ll @@ -0,0 +1,113 @@ +; RUN: opt -S %s -simplifycfg | FileCheck %s + +define void @test(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test + %iplus1 = add nsw i32 %i, 1 + %var29 = icmp ugt i32 %length.i, 5 + %var30 = icmp ugt i32 %length.i, 6 +; CHECK: br i1 %var30, label %in_bounds, label %next + br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} + +next: +; CHECK-LABEL: next: +; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds + br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} + +in_bounds: + ret void + +out_of_bounds: + call void @foo(i64 0) + unreachable + +out_of_bounds2: + call void @foo(i64 1) + unreachable +} + +define void @test2(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test2 + %iplus1 = add nsw i32 %i, 1 + %var29 = icmp slt i32 %i, %length.i + %var30 = icmp slt i32 %iplus1, %length.i +; CHECK: br i1 %var30, label %in_bounds, label %next + br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} + +next: +; CHECK-LABEL: next: +; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds + br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} + +in_bounds: + ret void + +out_of_bounds: + call void @foo(i64 0) + unreachable + +out_of_bounds2: + call void @foo(i64 1) + unreachable +} + +; This is a negative test. The msw on the add might be control +; dependent on the first check, so we can't hoist it above. +define void @test3(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test3 + %var29 = icmp slt i32 %i, %length.i +; CHECK: br i1 %var29, label %next, label %out_of_bounds + br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} + +next: +; CHECK-LABEL: next: +; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2 + %iplus1 = add nsw i32 %i, 1 + %var30 = icmp slt i32 %iplus1, %length.i + br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} + +in_bounds: + ret void + +out_of_bounds: + call void @foo(i64 0) + unreachable + +out_of_bounds2: + call void @foo(i64 1) + unreachable +} + +; As written, this one can't trigger today. It would require us to duplicate +; the %val1 load down two paths and that's not implemented yet. +define i64 @test4(i32 %length.i, i32 %i, i64* %base) { +; CHECK-LABEL: @test4 + %var29 = icmp slt i32 %i, %length.i +; CHECK: br i1 %var29, label %next, label %out_of_bounds + br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} + +next: +; CHECK-LABEL: next: + %addr1 = getelementptr i64, i64* %base, i32 %i + %val1 = load i64, i64* %addr1 + %iplus1 = add nsw i32 %i, 1 + %var30 = icmp slt i32 %iplus1, %length.i +; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2 + br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} + +in_bounds: + %addr2 = getelementptr i64, i64* %base, i32 %iplus1 + %val2 = load i64, i64* %addr2 + %res = sub i64 %val1, %val2 + ret i64 %res + +out_of_bounds: + call void @foo(i64 0) + unreachable + +out_of_bounds2: + call void @foo(i64 %val1) + unreachable +} + +declare void @foo(i64) +