diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1125,7 +1125,8 @@ } bool isCopy() const { - return getOpcode() == TargetOpcode::COPY; + return getOpcode() == TargetOpcode::COPY || + getOpcode() == TargetOpcode::TCOPY; } bool isFullCopy() const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -72,7 +72,7 @@ /// virtual registers have been created for all the instructions, and it's /// only needed in cases where the register classes implied by the /// instructions are insufficient. It is emitted as a COPY MachineInstr. - HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) +HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) @@ -91,11 +91,17 @@ /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 /// After register coalescing references of v1024 should be replace with /// v1027:3, v1025 with v1027:4, etc. - HANDLE_TARGET_OPCODE(REG_SEQUENCE) +HANDLE_TARGET_OPCODE(REG_SEQUENCE) /// COPY - Target-independent register copy. This instruction can also be /// used to copy between subregisters of virtual registers. - HANDLE_TARGET_OPCODE(COPY) +HANDLE_TARGET_OPCODE(COPY) + +/// TCOPY - This instruction is the terminator version of COPY. The purpose +/// is to allow copies from terminators to be properly represented (e.g. an +/// INLINEASM_BR that defines a physical register) without having +/// to introduce "live-ins" for physical registers before register allocation. +HANDLE_TARGET_OPCODE(TCOPY) /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1114,6 +1114,14 @@ let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } +def TCOPY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; + let isTerminator = 1; +} def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -133,7 +133,6 @@ } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { - if (MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "dead copy: " << *MI); MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -188,13 +187,12 @@ bool MadeChange = false; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); - mbbi != mbbe; ++mbbi) { - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); - mi != me;) { - MachineInstr &MI = *mi; + for (auto &MBB : MF) { + for (MachineBasicBlock::iterator MII = MBB.begin(), ME = MBB.end(); + MII != ME;) { + MachineInstr &MI = *MII; // Advance iterator here because MI may be erased. - ++mi; + ++MII; // Only expand pseudos. if (!MI.isPseudo()) @@ -212,6 +210,7 @@ MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -842,8 +842,8 @@ /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, + const TargetInstrInfo *TII, SmallVectorImpl &DbgValuesToSink) { - // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. @@ -858,15 +858,17 @@ SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + // The copy no longer needs to be a terminator, so convert it to a normal + // COPY. + if (MI.getOpcode() == TargetOpcode::TCOPY) + MI.setDesc(TII->get(TargetOpcode::COPY)); + // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); - DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + for (auto *DbgMI : DbgValuesToSink) { + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); if (!attemptDebugCopyProp(MI, *DbgMI)) @@ -891,6 +893,11 @@ if (MI.isConvergent()) return false; + // Sink TCOPY instructions after register allocation to avoid mucking with + // live-ins. + if (MI.getOpcode() == TargetOpcode::TCOPY) + return false; + // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) @@ -1017,7 +1024,7 @@ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); - performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); + performSink(MI, *SuccToSinkTo, InsertPos, TII, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -1380,12 +1387,13 @@ // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); + performSink(*MI, *SuccBB, InsertPos, TII, DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } + return Changed; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -832,15 +832,23 @@ lastIndex = idx; } - // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. + // Ensure non-terminators don't follow terminators. Ignore predicated + // terminators formed by if conversion. + // // FIXME: If conversion shouldn't need to violate this rule. if (MI->isTerminator() && !TII->isPredicated(*MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator && !MI->isDebugEntryValue()) { - report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + // Ignore stack dumps after a terminator at -O0. These are most likely from + // a TCOPY. + if (TM->getOptLevel() == CodeGenOpt::None && MI->mayStore()) { + if (!FirstTerminator) + FirstTerminator = MI; + } else { + report("Non-terminator instruction after the first terminator", MI); + errs() << "First terminator was:\t" << *FirstTerminator; + } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -174,8 +174,13 @@ } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - VRBase).addReg(SrcReg); + const MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); + unsigned TgtOpc = + Term != MBB->end() && Term->getOpcode() == TargetOpcode::INLINEASM_BR + ? TargetOpcode::TCOPY + : TargetOpcode::COPY; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), VRBase) + .addReg(SrcReg); } SDValue Op(Node, ResNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1030,44 +1030,34 @@ // Split after an INLINEASM_BR block with outputs. This allows us to keep the // copy to/from register instructions from being between two terminator // instructions, which causes the machine instruction verifier agita. - auto TI = llvm::find_if(*BB, [](const MachineInstr &MI){ - return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + auto Iter = llvm::find_if(*BB, [](MachineInstr &MI) { + MachineBasicBlock::iterator Next(MI); + Next = std::next(Next); + return Next != MI.getParent()->end() && + (MI.getOpcode() == TargetOpcode::TCOPY || + MI.getOpcode() == TargetOpcode::INLINEASM_BR) && + Next->getOpcode() == TargetOpcode::COPY; }); - auto SplicePt = TI != BB->end() ? std::next(TI) : BB->end(); - if (TI != BB->end() && SplicePt != BB->end() && - TI->getOpcode() == TargetOpcode::INLINEASM_BR && - SplicePt->getOpcode() == TargetOpcode::COPY) { - MachineBasicBlock *FallThrough = BB->getFallThrough(); - if (!FallThrough) + if (Iter != BB->end()) { + MachineBasicBlock *DefaultTarget = BB->getFallThrough(); + if (!DefaultTarget) for (const MachineOperand &MO : BB->back().operands()) if (MO.isMBB()) { - FallThrough = MO.getMBB(); + DefaultTarget = MO.getMBB(); break; } - assert(FallThrough && "Cannot find default dest block for callbr!"); + assert(DefaultTarget && "Cannot find default dest block for callbr!"); MachineBasicBlock *CopyBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); MachineFunction::iterator BBI(*BB); MF.insert(++BBI, CopyBB); - CopyBB->splice(CopyBB->begin(), BB, SplicePt, BB->end()); + CopyBB->splice(CopyBB->begin(), BB, std::next(Iter), BB->end()); CopyBB->setInlineAsmBrDefaultTarget(); - CopyBB->addSuccessor(FallThrough, BranchProbability::getOne()); + CopyBB->addSuccessor(DefaultTarget, BranchProbability::getOne()); BB->addSuccessor(CopyBB, BranchProbability::getOne()); - // Mark all physical registers defined in the original block as being live - // on entry to the copy block. - for (const auto &MI : *CopyBB) - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg()) { - Register reg = MO.getReg(); - if (Register::isPhysicalRegister(reg)) { - CopyBB->addLiveIn(reg); - break; - } - } - // Bit of a hack: The copy block we created here exists only because we want // the CFG to work with the current system. However, the successors to the // block with the INLINEASM_BR instruction expect values to come from *that* diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll --- a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 @X = common local_unnamed_addr global i32 0, align 4 @@ -9,6 +10,13 @@ ; CHECK-LABEL: .Ltmp0: ; CHECK-LABEL: .LBB0_1: // %l_yes ; CHECK-LABEL: .LBB0_2: // %cleanup + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_1: // %l_yes +; CHECK-O0-LABEL: .LBB0_2: // %cleanup entry: callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test1, %l_yes)) to label %cleanup [label %l_yes] @@ -22,7 +30,8 @@ } define void @test2() { -; CHECK-LABEL: test2: +; CHECK-LABEL: test2: +; CHECK-O0-LABEL: test2: entry: %0 = load i32, i32* @X, align 4 %and = and i32 %0, 1 @@ -34,6 +43,11 @@ ; CHECK-NEXT: .word .Ltmp2 ; CHECK-LABEL: .Ltmp2: ; CHECK-NEXT: .LBB1_3: // %if.end6 + +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp3 +; CHECK-O0-LABEL: .Ltmp3: +; CHECK-O0-NEXT: .LBB1_3: // %if.end6 callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test2, %if.end6)) to label %if.then4 [label %if.end6] @@ -50,6 +64,9 @@ if.then9: ; CHECK-LABEL: .Ltmp4: ; CHECK-NEXT: .LBB1_5: // %l_yes + +; CHECK-O0-LABEL: .Ltmp5: +; CHECK-O0-NEXT: .LBB1_6: // %l_yes callbr void asm sideeffect "", "X"(i8* blockaddress(@test2, %l_yes)) to label %if.end10 [label %l_yes] diff --git a/llvm/test/CodeGen/SystemZ/asm-20.ll b/llvm/test/CodeGen/SystemZ/asm-20.ll --- a/llvm/test/CodeGen/SystemZ/asm-20.ll +++ b/llvm/test/CodeGen/SystemZ/asm-20.ll @@ -1,6 +1,7 @@ ; Test that asm goto can be compiled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O0 define i32 @c() { entry: diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll --- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: @@ -9,6 +10,16 @@ ; CHECK-NEXT: callq foo ; CHECK-LABEL: .Ltmp0: ; CHECK-NEXT: # %bb.2: # %baz + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .quad .Ltmp0 +; CHECK-O0-NEXT: .quad .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_2: # %bar +; CHECK-O0-NEXT: movl +; CHECK-O0-NEXT: callq foo +; CHECK-O0-LABEL: .Ltmp0: +; CHECK-O0-NEXT: # %bb.3: # %baz entry: callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %baz), i8* blockaddress(@test1, %bar)) to label %asm.fallthrough [label %bar]