Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -1142,31 +1142,37 @@ bool SawStore = false; bool HasAliasedStore = false; - DenseSet HandledBlocks; - DenseSet HandledDomBlocks; - // Go through all reachable blocks from From. - for (MachineBasicBlock *BB : depth_first(From)) { - // We insert the instruction at the start of block To, so no need to worry - // about stores inside To. - // Store in block From should be already considered when just enter function - // SinkInstruction. - if (BB == To || BB == From) + SmallPtrSet HandledDomBlocks; + // Go through all block reverse-reachable from To. + SmallVector Worklist; + append_range(Worklist, To->predecessors()); + while (!Worklist.empty()) { + MachineBasicBlock *BB = Worklist.pop_back_val(); + if (!HandledDomBlocks.insert(BB).second) continue; - // We already handle this BB in previous iteration. - if (HandledBlocks.count(BB)) + // Don't walk past the original position of the instruction. + if (BB == From) continue; - HandledBlocks.insert(BB); - // To post dominates BB, it must be a path from block From. - if (PDT->dominates(To, BB)) { - if (!HandledDomBlocks.count(BB)) - HandledDomBlocks.insert(BB); + // If this BB is too big or the block number in straight line between From + // and To is too big, stop searching to save compiling time. + if (BB->size() > SinkLoadInstsPerBlockThreshold || + HandledDomBlocks.size() > SinkLoadBlocksThreshold) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } - // If this BB is too big or the block number in straight line between From - // and To is too big, stop searching to save compiling time. - if (BB->size() > SinkLoadInstsPerBlockThreshold || - HandledDomBlocks.size() > SinkLoadBlocksThreshold) { + for (MachineInstr &I : *BB) { + // Treat as alias conservatively for a call or an ordered memory + // operation. + if (I.isCall() || I.hasOrderedMemoryRef()) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) HasStoreCache[std::make_pair(DomBB, To)] = true; @@ -1177,32 +1183,19 @@ return true; } - for (MachineInstr &I : *BB) { - // Treat as alias conservatively for a call or an ordered memory - // operation. - if (I.isCall() || I.hasOrderedMemoryRef()) { - for (auto *DomBB : HandledDomBlocks) { - if (DomBB != BB && DT->dominates(DomBB, BB)) - HasStoreCache[std::make_pair(DomBB, To)] = true; - else if(DomBB != BB && DT->dominates(BB, DomBB)) - HasStoreCache[std::make_pair(From, DomBB)] = true; - } - HasStoreCache[BlockPair] = true; - return true; - } - - if (I.mayStore()) { - SawStore = true; - // We still have chance to sink MI if all stores between are not - // aliased to MI. - // Cache all store instructions, so that we don't need to go through - // all From reachable blocks for next load instruction. - if (I.mayAlias(AA, MI, false)) - HasAliasedStore = true; - StoreInstrCache[BlockPair].push_back(&I); - } + if (I.mayStore()) { + SawStore = true; + // We still have chance to sink MI if all stores between are not + // aliased to MI. + // Cache all store instructions, so that we don't need to go through + // all From reachable blocks for next load instruction. + if (I.mayAlias(AA, MI, false)) + HasAliasedStore = true; + StoreInstrCache[BlockPair].push_back(&I); } } + + append_range(Worklist, BB->predecessors()); } // If there is no store at all, cache the result. if (!SawStore) Index: llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll =================================================================== --- llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll +++ llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll @@ -7,18 +7,15 @@ ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %r14 -; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movq (%rsi), %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_1: # %split.3 -; CHECK-NEXT: movq (%r14), %rbp -; CHECK-NEXT: testb $1, %r15b +; CHECK-NEXT: testb $1, %r14b ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %clobber ; CHECK-NEXT: callq clobber@PLT