Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -171,6 +171,14 @@ /// out in-order within the function. SmallVector Blocks; +#ifndef NDEBUG + /// The set of basic blocks that have terminators that cannot be fully + /// analyzed. These basic blocks cannot be re-ordered safely by + /// MachineBlockPlacement, and we must preserve physical layout of these + /// blocks and their successors through the pass. + SmallPtrSet BlocksWithUnanalyzableExits; +#endif + /// \brief A handle to the function-wide basic block to block chain mapping. /// /// This is retained in each block chain to simplify the computation of child @@ -200,6 +208,17 @@ /// \brief End of blocks within the chain. iterator end() { return Blocks.end(); } +#ifndef NDEBUG + /// Return true if \p MBB is required to have an analyzable block exit by the + /// end of the transform. The blocks for which this returns true may have + /// been rearranged in ways that require us to adjust their terminator + /// sequence (for correctness), and we can't do that if the block terminator + /// sequence is not analyzable. + bool isAnalyzableBlockExitRequired(MachineBasicBlock *MBB) const { + return !BlocksWithUnanalyzableExits.count(MBB); + } +#endif + bool remove(MachineBasicBlock* BB) { for(iterator i = begin(); i != end(); ++i) { if (*i == BB) { @@ -216,9 +235,31 @@ /// a contiguous sequence of basic blocks, updating the edge list, and /// updating the block -> chain mapping. It does not free or tear down the /// old chain, but the old chain's block list is no longer valid. - void merge(MachineBasicBlock *BB, BlockChain *Chain) { + /// + /// If \p UnanalyzableEdge is true then the edge from the last basic block of + /// this chain to \p BB is marked as "unanalyzable" (that is, + /// MachineBlockPlacement cannot modify it safely). The converse is also + /// true: if \p Unanalyzable is false then the said edge is expected to be + /// analyzable and modifiable by MachineBlockPlacement. This information is + /// used to issue asserts. + void merge(MachineBasicBlock *BB, BlockChain *Chain, + bool UnanalyzableEdge = false) { assert(BB); assert(!Blocks.empty()); + (void)UnanalyzableEdge; + +#ifndef NDEBUG + assert((!UnanalyzableEdge || !Chain) && + "Merge non-analyzable chains not supported!"); + + if (UnanalyzableEdge) + BlocksWithUnanalyzableExits.insert(Blocks.back()); + + if (Chain) { + auto &Other = Chain->BlocksWithUnanalyzableExits; + BlocksWithUnanalyzableExits.insert(Other.begin(), Other.end()); + } +#endif // Fast path in case we don't have a chain already. if (!Chain) { @@ -1588,7 +1629,7 @@ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); - Chain->merge(NextBB, nullptr); + Chain->merge(NextBB, nullptr, /*UnanalyzableEdge=*/true); FI = NextFI; BB = NextBB; } @@ -1661,6 +1702,19 @@ Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. +#ifndef NDEBUG + if (FunctionChain.isAnalyzableBlockExitRequired(PrevBB)) { + // Given the exact block placement we chose, we may actually not _need_ to + // be able to edit PrevBB's terminator sequence, but not being _able_ to + // do that at this point is a bug. + assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) || + !PrevBB->canFallThrough()) && + "Precondition violated!"); + Cond.clear(); + TBB = FBB = nullptr; + } +#endif + // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explicit "goto" instruction // before layout, and no longer fall-through it after layout; or Index: lib/CodeGen/TailDuplicator.cpp =================================================================== --- lib/CodeGen/TailDuplicator.cpp +++ lib/CodeGen/TailDuplicator.cpp @@ -550,8 +550,8 @@ // blocks with unanalyzable fallthrough get layed out contiguously. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true) - && TailBB.canFallThrough()) + if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && + TailBB.canFallThrough()) return false; // If the target has hardware branch prediction that can handle indirect @@ -660,7 +660,7 @@ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) @@ -687,7 +687,7 @@ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) continue; Changed = true; @@ -750,7 +750,7 @@ MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) return false; @@ -833,7 +833,7 @@ // Simplify MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch @@ -861,7 +861,7 @@ if (PrevBB->succ_size() == 1 && // Layout preds are not always CFG preds. Check. *PrevBB->succ_begin() == TailBB && - !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && PriorCond.empty() && (!PriorTBB || PriorTBB == TailBB) && TailBB->pred_size() == 1 && Index: test/CodeGen/X86/block-placement.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/block-placement.mir @@ -0,0 +1,87 @@ +# RUN: llc -O3 -run-pass=block-placement -o - %s | FileCheck %s + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + + declare void @stub(i32*) + + define i32 @f(i32* %ptr, i1 %cond) { + entry: + br i1 %cond, label %left, label %right + + left: ; preds = %entry + %is_null = icmp eq i32* %ptr, null + br i1 %is_null, label %null, label %not_null, !prof !0, !make.implicit !1 + + not_null: ; preds = %left + %val = load i32, i32* %ptr + ret i32 %val + + null: ; preds = %left + call void @stub(i32* %ptr) + unreachable + + right: ; preds = %entry + call void @stub(i32* null) + unreachable + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + + !0 = !{!"branch_weights", i32 1048575, i32 1} + !1 = !{} + +... +--- +# CHECK: name: f +name: f +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '%rdi' } + - { reg: '%esi' } + +# CHECK: %eax = FAULTING_LOAD_OP %bb.3.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr) +# CHECK-NEXT: JMP_1 %bb.2.not_null +# CHECK: bb.3.null: +# CHECK: bb.4.right: +# CHECK: bb.2.not_null: + +body: | + bb.0.entry: + successors: %bb.1.left(0x7ffff800), %bb.3.right(0x00000800) + liveins: %esi, %rdi + + frame-setup PUSH64r undef %rax, implicit-def %rsp, implicit %rsp + CFI_INSTRUCTION def_cfa_offset 16 + TEST8ri %sil, 1, implicit-def %eflags, implicit killed %esi + JE_1 %bb.3.right, implicit killed %eflags + + bb.1.left: + successors: %bb.2.null(0x7ffff800), %bb.4.not_null(0x00000800) + liveins: %rdi + + %eax = FAULTING_LOAD_OP %bb.2.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr) + JMP_1 %bb.4.not_null + + bb.4.not_null: + liveins: %rdi, %eax + + %rcx = POP64r implicit-def %rsp, implicit %rsp + RETQ %eax + + bb.2.null: + liveins: %rdi + + CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp + + bb.3.right: + dead %edi = XOR32rr undef %edi, undef %edi, implicit-def dead %eflags, implicit-def %rdi + CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp + +...