diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -271,7 +271,9 @@ bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly); + bool hoistCommonCodeSuccessors(BasicBlock *BB, bool EqTermsOnly); + bool hoistThenElseIdenticalTerminatorToIf(BranchInst *BI, Instruction *I1, + Instruction *I2); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, @@ -1408,8 +1410,9 @@ } // If we would need to insert a select that uses the value of this invoke -// (comments in HoistThenElseCodeToIf explain why we would need to do this), we -// can't hoist the invoke, as there is nowhere to put the select in this case. +// (comments in hoistCommonCodeSuccessors explain why we would need to do this), +// we can't hoist the invoke, as there is nowhere to put the select in this +// case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { for (BasicBlock *Succ : successors(BB1)) { @@ -1424,9 +1427,9 @@ return true; } -// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` -// didn't hoist. They restrict what kind of instructions can be reordered -// across. +// Get interesting characteristics of instructions that +// `hoistCommonCodeSuccessors` didn't hoist. They restrict what kind of +// instructions can be reordered across. enum SkipFlags { SkipReadMem = 1, SkipSideEffect = 2, @@ -1484,7 +1487,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); -/// Helper function for HoistThenElseCodeToIf. Return true if identical +/// Helper function for hoistCommonCodeSuccessors. Return true if identical /// instructions \p I1 and \p I2 can and should be hoisted. static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI) { @@ -1515,153 +1518,208 @@ return true; } -/// Given a conditional branch that goes to BB1 and BB2, hoist any common code -/// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, -/// only perform hoisting in case both blocks only contain a terminator. In that -/// case, only the original BI will be replaced and selects for PHIs are added. -bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { +/// Hoist any common code in the successor blocks up into the block. This +/// function guarantees that BB dominates all successors. If EqTermsOnly is +/// given, only perform hoisting in case both blocks only contain a terminator. +/// In that case, only the original BI will be replaced and selects for PHIs are +/// added. +bool SimplifyCFGOpt::hoistCommonCodeSuccessors(BasicBlock *BB, + bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // instructions in the two blocks. In particular, we don't want to get into + // O(∏N) situations here where N is the sizes of these successors. As // such, we currently just scan for obviously identical instructions in an // identical order, possibly separated by the same number of non-identical // instructions. - BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. - BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + unsigned int SuccSize = succ_size(BB); + if (SuccSize < 2) + return false; // If either of the blocks has it's address taken, then we can't do this fold, // because the code we'd hoist would no longer run when we jump into the block // by it's address. - if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) - return false; - - BasicBlock::iterator BB1_Itr = BB1->begin(); - BasicBlock::iterator BB2_Itr = BB2->begin(); - - Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa(I1)) - I1 = &*BB1_Itr++; - while (isa(I2)) - I2 = &*BB2_Itr++; + for (auto *BB : successors(BB)) { + if (BB->hasAddressTaken() || !BB->getSinglePredecessor()) { + return false; + } } - if (isa(I1)) - return false; - - BasicBlock *BIParent = BI->getParent(); - bool Changed = false; + auto *TerminatorInst = BB->getTerminator(); - auto _ = make_scope_exit([&]() { - if (Changed) - ++NumHoistCommonCode; - }); + // The second of pair record any skipped instuctions that may read memory, + // write memory or have side effects, or have implicit control flow. + using SuccIterPair = std::pair; + // This flag affects all successors. + unsigned SkipFlagsBB = 0; + SmallVector SuccIters; + for (auto *Succ : successors(BB)) { + BasicBlock::iterator SuccItr = Succ->begin(); + if (isa(*SuccItr)) + return false; + SuccIters.push_back(SuccIterPair(SuccItr, 0)); + } // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { // Skip any debug intrinsics, as they are free to hoist. - auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); - auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); - if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) - return false; - if (!I1NonDbg->isTerminator()) - return false; + for (auto &SuccIter : SuccIters) { + auto *INonDbg = &*skipDebugIntrinsics(SuccIter.first); + if (!INonDbg->isTerminator()) + return false; + } // Now we know that we only need to hoist debug intrinsics and the // terminator. Let the loop below handle those 2 cases. } + bool Changed = false; + // Count how many instructions were not hoisted so far. There's a limit on how // many instructions we skip, serving as a compilation time control as well as // preventing excessive increase of life ranges. unsigned NumSkipped = 0; - // Record any skipped instuctions that may read memory, write memory or have - // side effects, or have implicit control flow. - unsigned SkipFlagsBB1 = 0; - unsigned SkipFlagsBB2 = 0; - for (;;) { + // If we find an unreachable instruction, we can still hoist the instruction + // that doesn't has any side effect. + if (SuccIters.size() > 2) + for (auto *Iter = SuccIters.begin(); Iter != SuccIters.end();) { + if (isa(*Iter->first)) { + SkipFlagsBB |= SkipSideEffect; + Iter = SuccIters.erase(Iter); + } else { + ++Iter; + } + } + + if (SuccIters.size() < 2) + return Changed; + + auto *SuccIterBegin = SuccIters.begin(); + auto &BB1Itr = *SuccIterBegin++; + + Instruction *I1 = &*BB1Itr.first; + unsigned SkipFlagsBB1 = BB1Itr.second | SkipFlagsBB; + auto *BB1 = I1->getParent(); + + // Skip debug info if it is not identical. + for (auto &SuccIter : iterator_range(SuccIterBegin, SuccIters.end())) { + Instruction *I2 = &*SuccIter.first; + if (!I1->isIdenticalToWhenDefined(I2)) { + while (isa(I1)) + I1 = &*++BB1Itr.first; + while (isa(I2)) + I2 = &*++SuccIter.first; + } + } + // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (I1->isTerminator() || I2->isTerminator()) { + bool HasTerminator = I1->isTerminator(); + if (!HasTerminator) + for (auto &SuccIter : iterator_range(SuccIterBegin, SuccIters.end())) { + Instruction *I2 = &*SuccIter.first; + if (I2->isTerminator()) { + HasTerminator = true; + break; + } + } + if (HasTerminator) { + auto *BI = dyn_cast(TerminatorInst); + // TODO: Handles the switch instruction. + if (!BI) + return Changed; + Instruction *I2 = &*SuccIterBegin->first; // If any instructions remain in the block, we cannot hoist terminators. if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) return Changed; - goto HoistTerminator; + return hoistThenElseIdenticalTerminatorToIf(BI, I1, I2) || Changed; } - if (I1->isIdenticalToWhenDefined(I2) && - // Even if the instructions are identical, it may not be safe to hoist - // them if we have skipped over instructions with side effects or their - // operands weren't hoisted. - isSafeToHoistInstr(I1, SkipFlagsBB1) && - isSafeToHoistInstr(I2, SkipFlagsBB2) && - shouldHoistCommonInstructions(I1, I2, TTI)) { - if (isa(I1) || isa(I2)) { - assert(isa(I1) && isa(I2)); + bool AllInstsAreIdentical = isSafeToHoistInstr(I1, SkipFlagsBB1); + if (AllInstsAreIdentical) + for (auto &SuccIter : iterator_range(SuccIterBegin, SuccIters.end())) { + Instruction *I2 = &*SuccIter.first; + unsigned SkipFlagsBB2 = SuccIter.second | SkipFlagsBB; + if (!I1->isIdenticalToWhenDefined(I2) || + // Even if the instructions are identical, it may not be safe to + // hoist them if we have skipped over instructions with side effects + // or their operands weren't hoisted. + !isSafeToHoistInstr(I2, SkipFlagsBB2) || + !shouldHoistCommonInstructions(I1, I2, TTI)) { + AllInstsAreIdentical = false; + break; + } + } + + if (AllInstsAreIdentical) { + BB1Itr.first++; + if (isa(I1)) { // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting // to merge locations, simply hoist both copies of the intrinsic. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - BIParent->splice(BI->getIterator(), BB2, I2->getIterator()); + BB->splice(TerminatorInst->getIterator(), BB1, I1->getIterator()); + for (auto &SuccIter : iterator_range(SuccIterBegin, SuccIters.end())) { + auto *I2 = &*SuccIter.first++; + assert(isa(I2)); + BB->splice(TerminatorInst->getIterator(), I2->getParent(), + I2->getIterator()); + } } else { // For a normal instruction, we just move one to right before the // branch, then replace all uses of the other with the first. Finally, // we remove the now redundant second instruction. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - combineMetadataForCSE(I1, I2, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); + BB->splice(TerminatorInst->getIterator(), BB1, I1->getIterator()); + for (auto &SuccIter : iterator_range(SuccIterBegin, SuccIters.end())) { + Instruction *I2 = &*SuccIter.first++; + assert(I2 != I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + combineMetadataForCSE(I1, I2, true); + + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + I2->eraseFromParent(); + } } Changed = true; - ++NumHoistCommonInstrs; + if (NumHoistCommonInstrs == 0) + NumHoistCommonCode += SuccIters.size(); + NumHoistCommonInstrs += SuccIters.size(); } else { if (NumSkipped >= HoistCommonSkipLimit) return Changed; // We are about to skip over a pair of non-identical instructions. Record // if any have characteristics that would prevent reordering instructions // across them. - SkipFlagsBB1 |= skippedInstrFlags(I1); - SkipFlagsBB2 |= skippedInstrFlags(I2); + for (auto &SuccIter : SuccIters) { + Instruction *I = &*SuccIter.first++; + SuccIter.second |= skippedInstrFlags(I); + } ++NumSkipped; } - - I1 = &*BB1_Itr++; - I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa(I1)) - I1 = &*BB1_Itr++; - while (isa(I2)) - I2 = &*BB2_Itr++; - } } +} - return Changed; - -HoistTerminator: +bool SimplifyCFGOpt::hoistThenElseIdenticalTerminatorToIf(BranchInst *BI, + Instruction *I1, + Instruction *I2) { + bool Changed = false; + BasicBlock *BIParent = BI->getParent(); + BasicBlock *BB1 = I1->getParent(); + BasicBlock *BB2 = I2->getParent(); + assert(I1->isIdenticalToWhenDefined(I2)); + assert(I1->isTerminator() && I2->isTerminator()); // It may not be possible to hoist an invoke. // FIXME: Can we define a safety predicate for CallBr? if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return Changed; + return false; // TODO: callbr hoisting currently disabled pending further study. if (isa(I1)) - return Changed; + return false; for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { @@ -1674,7 +1732,7 @@ // eliminate undefined control flow then converting it to a select. if (passingValueIsAlwaysUndefined(BB1V, &PN) || passingValueIsAlwaysUndefined(BB2V, &PN)) - return Changed; + return false; } } @@ -2765,8 +2823,8 @@ Value *OrigV = PN.getIncomingValueForBlock(BB); Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. + // FIXME: Try to remove some of the duplication with + // hoistCommonCodeSuccessors. Skip PHIs which are trivial. if (ThenV == OrigV) continue; @@ -6794,6 +6852,10 @@ if (ReduceSwitchRange(SI, Builder, DL, TTI)) return requestResimplify(); + if (HoistCommon && + hoistCommonCodeSuccessors(SI->getParent(), !Options.HoistCommonInsts)) + return requestResimplify(); + return false; } @@ -7058,7 +7120,10 @@ // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts)) + if (HoistCommon && + hoistCommonCodeSuccessors(BI->getParent(), !Options.HoistCommonInsts)) + // if (HoistCommon && hoistCommonCodeSuccessors(BI, + // !Options.HoistCommonInsts)) return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally diff --git a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll --- a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll +++ b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll @@ -70,7 +70,7 @@ i64 4, label %sw.bb4 ] sw.bb0: - call void asm sideeffect "", ""() + call void asm sideeffect "nop", ""() ret void sw.bb1: call void asm sideeffect "", ""() diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s + +define i1 @common_instr_with_unreachable(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: @common_instr_with_unreachable( +; CHECK-NEXT: start: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: ret i1 [[TMP0]] +; +start: + switch i64 %a, label %unreachable [ + i64 0, label %bb0 + i64 1, label %bb1 + i64 2, label %bb2 + ] + +unreachable: + unreachable + +bb0: ; preds = %start + %0 = icmp eq i64 %b, %c + br label %exit + +bb1: ; preds = %start + %1 = icmp eq i64 %b, %c + br label %exit + +bb2: ; preds = %start + %2 = icmp eq i64 %b, %c + br label %exit + +exit: ; preds = %bb2, %bb1, %bb0 + %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] + ret i1 %result +} + +define i1 @common_instr_with_unreachable_2(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: @common_instr_with_unreachable_2( +; CHECK-NEXT: start: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: ret i1 [[TMP0]] +; +start: + switch i64 %a, label %bb1 [ + i64 0, label %bb0 + i64 1, label %unreachable + i64 2, label %bb2 + ] + +unreachable: + unreachable + +bb0: ; preds = %start + %0 = icmp eq i64 %b, %c + br label %exit + +bb1: ; preds = %start + %1 = icmp eq i64 %b, %c + br label %exit + +bb2: ; preds = %start + %2 = icmp eq i64 %b, %c + br label %exit + +exit: ; preds = %bb2, %bb1, %bb0 + %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] + ret i1 %result +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll --- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll @@ -24,3 +24,74 @@ ret void } +define i1 @common_instr_on_switch(i64 %a, i64 %b, i64 %c) unnamed_addr { +; CHECK-LABEL: @common_instr_on_switch( +; CHECK-NEXT: start: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: ret i1 [[TMP0]] +; +start: + switch i64 %a, label %bb0 [ + i64 1, label %bb1 + i64 2, label %bb2 + ] + +bb0: ; preds = %start + %0 = icmp eq i64 %b, %c + br label %exit + +bb1: ; preds = %start + %1 = icmp eq i64 %b, %c + br label %exit + +bb2: ; preds = %start + %2 = icmp eq i64 %b, %c + br label %exit + +exit: ; preds = %bb2, %bb1, %bb0 + %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] + ret i1 %result +} + +define i1 @partial_common_instr_on_switch(i64 %a, i64 %b, i64 %c) unnamed_addr { +; CHECK-LABEL: @partial_common_instr_on_switch( +; CHECK-NEXT: start: +; CHECK-NEXT: switch i64 [[A:%.*]], label [[BB0:%.*]] [ +; CHECK-NEXT: i64 1, label [[BB1:%.*]] +; CHECK-NEXT: i64 2, label [[BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: bb0: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[B]], [[C]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: ret i1 [[RESULT]] +; +start: + switch i64 %a, label %bb0 [ + i64 1, label %bb1 + i64 2, label %bb2 + ] + +bb0: ; preds = %start + %0 = icmp eq i64 %b, %c + br label %exit + +bb1: ; preds = %start + %1 = icmp ne i64 %b, %c + br label %exit + +bb2: ; preds = %start + %2 = icmp eq i64 %b, %c + br label %exit + +exit: ; preds = %bb2, %bb1, %bb0 + %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] + ret i1 %result +}