diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.h b/llvm/include/llvm/CodeGen/PHIEliminationUtils.h rename from llvm/lib/CodeGen/PHIEliminationUtils.h rename to llvm/include/llvm/CodeGen/PHIEliminationUtils.h --- a/llvm/lib/CodeGen/PHIEliminationUtils.h +++ b/llvm/include/llvm/CodeGen/PHIEliminationUtils.h @@ -12,13 +12,13 @@ #include "llvm/CodeGen/MachineBasicBlock.h" namespace llvm { - /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from - /// SrcReg when following the CFG edge to SuccMBB. This needs to be after - /// any def of SrcReg, but before any subsequent point where control flow - /// might jump out of the basic block. - MachineBasicBlock::iterator - findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, - unsigned SrcReg); -} +/// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from +/// SrcReg when following the CFG edge to SuccMBB. This needs to be after +/// any def of SrcReg, but before any subsequent point where control flow +/// might jump out of the basic block. +MachineBasicBlock::iterator findPHICopyInsertPoint(MachineBasicBlock *MBB, + MachineBasicBlock *SuccMBB, + unsigned SrcReg); +} // namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -123,9 +123,10 @@ SmallVectorImpl &TDBBs, SmallVectorImpl &Copies, SmallVectorImpl *CandidatePtr); - void appendCopies(MachineBasicBlock *MBB, - SmallVectorImpl> &CopyInfos, - SmallVectorImpl &Copies); + void + appendCopies(MachineBasicBlock *MBB, MachineBasicBlock *Succ, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies); void removeDeadBlock( MachineBasicBlock *MBB, diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "PHIEliminationUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -29,6 +28,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PHIEliminationUtils.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp --- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "PHIEliminationUtils.h" +#include "llvm/CodeGen/PHIEliminationUtils.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -27,12 +27,12 @@ //===----------------------------------------------------------------------===// #include "LiveRangeUtils.h" -#include "PHIEliminationUtils.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PHIEliminationUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -21,15 +21,16 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/PHIEliminationUtils.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -627,14 +628,6 @@ if (PreRegAlloc && MI.isCall()) return false; - // TailDuplicator::appendCopies will erroneously place COPYs after - // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same - // bug that was fixed in f7a53d82c090. - // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point - // for the COPY when replacing PHIs. - if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) - return false; - if (MI.isBundle()) InstrCount += MI.getBundleSize(); else if (!MI.isPHI() && !MI.isMetaInstruction()) @@ -884,7 +877,7 @@ duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } - appendCopies(PredBB, CopyInfos, Copies); + appendCopies(PredBB, TailBB, CopyInfos, Copies); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch @@ -948,7 +941,7 @@ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } - appendCopies(PrevBB, CopyInfos, Copies); + appendCopies(PrevBB, TailBB, CopyInfos, Copies); } else { TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. @@ -1006,7 +999,7 @@ MachineInstr *MI = &*I++; processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } - appendCopies(PredBB, CopyInfos, Copies); + appendCopies(PredBB, TailBB, CopyInfos, Copies); } return Changed; @@ -1014,12 +1007,14 @@ /// At the end of the block \p MBB generate COPY instructions between registers /// described by \p CopyInfos. Append resulting instructions to \p Copies. -void TailDuplicator::appendCopies(MachineBasicBlock *MBB, - SmallVectorImpl> &CopyInfos, - SmallVectorImpl &Copies) { - MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); +void TailDuplicator::appendCopies( + MachineBasicBlock *MBB, MachineBasicBlock *Succ, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies) { const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); for (auto &CI : CopyInfos) { + MachineBasicBlock::iterator Loc = + findPHICopyInsertPoint(MBB, Succ, CI.first); auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) .addReg(CI.second.Reg, 0, CI.second.SubReg); Copies.push_back(C); diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll --- a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=x86_64-linux -stop-after=early-tailduplication < %s | FileCheck %s -; Ensure that we don't duplicate a block with an "INLINEASM_BR" instruction -; during code gen. declare void @foo() define i8* @test1(i8** %arg1, i8* %arg2) { @@ -17,23 +15,25 @@ ; CHECK: JCC_1 %bb.2, 4, implicit $eflags ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.bb100: - ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000) ; CHECK: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store 8 into %ir.arg1) - ; CHECK: JMP_1 %bb.3 + ; CHECK: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, blockaddress(@test1, %ir-block.bb17.i.i.i), 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY [[MOV64rm]] + ; CHECK: JMP_1 %bb.5 ; CHECK: bb.2.bb106: - ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: bb.3.bb110: - ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000) - ; CHECK: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY]], %bb.2, [[MOV64rm]], %bb.1 ; CHECK: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, blockaddress(@test1, %ir-block.bb17.i.i.i), 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + ; CHECK: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY]] ; CHECK: JMP_1 %bb.5 ; CHECK: bb.4.bb17.i.i.i (address-taken): ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY2]], %bb.1, [[COPY3]], %bb.2 ; CHECK: bb.5.kmem_cache_has_cpu_partial.exit: - ; CHECK: $rax = COPY [[PHI]] + ; CHECK: [[PHI1:%[0-9]+]]:gr64 = PHI [[PHI]], %bb.4, [[COPY2]], %bb.1, [[COPY3]], %bb.2 + ; CHECK: $rax = COPY [[PHI1]] ; CHECK: RET 0, $rax bb: %i28.i = load i8*, i8** %arg1, align 8