diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -170,9 +170,6 @@ // Indicate that this basic block ends a section. bool IsEndSection = false; - /// Default target of the callbr of a basic block. - bool InlineAsmBrDefaultTarget = false; - /// List of indirect targets of the callbr of a basic block. SmallPtrSet InlineAsmBrIndirectTargets; @@ -484,22 +481,8 @@ InlineAsmBrIndirectTargets.insert(Tgt); } - /// Transfers indirect targets to INLINEASM_BR's copy block. - void transferInlineAsmBrIndirectTargets(MachineBasicBlock *CopyBB) { - for (auto *Target : InlineAsmBrIndirectTargets) - CopyBB->addInlineAsmBrIndirectTarget(Target); - return InlineAsmBrIndirectTargets.clear(); - } - - /// Returns true if this is the default dest of an INLINEASM_BR. - bool isInlineAsmBrDefaultTarget() const { - return InlineAsmBrDefaultTarget; - } - - /// Indicates if this is the default deft of an INLINEASM_BR. - void setInlineAsmBrDefaultTarget() { - InlineAsmBrDefaultTarget = true; - } + /// Returns the default destination of an INLINEASM_BR instruction. + MachineBasicBlock *getInlineAsmBrDefaultTarget(); /// Returns true if it is legal to hoist instructions into this block. bool isLegalToHoistInto() const; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1128,7 +1128,8 @@ } bool isCopy() const { - return getOpcode() == TargetOpcode::COPY; + return getOpcode() == TargetOpcode::COPY || + getOpcode() == TargetOpcode::TCOPY; } bool isFullCopy() const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -72,7 +72,7 @@ /// virtual registers have been created for all the instructions, and it's /// only needed in cases where the register classes implied by the /// instructions are insufficient. It is emitted as a COPY MachineInstr. - HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) +HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) @@ -91,11 +91,17 @@ /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 /// After register coalescing references of v1024 should be replace with /// v1027:3, v1025 with v1027:4, etc. - HANDLE_TARGET_OPCODE(REG_SEQUENCE) +HANDLE_TARGET_OPCODE(REG_SEQUENCE) /// COPY - Target-independent register copy. This instruction can also be /// used to copy between subregisters of virtual registers. - HANDLE_TARGET_OPCODE(COPY) +HANDLE_TARGET_OPCODE(COPY) + +/// TCOPY - This instruction is the terminator version of COPY. The purpose +/// is to allow copies from terminators to be properly represented (e.g. an +/// INLINEASM_BR that defines a physical register) without having +/// to introduce "live-ins" for physical registers before register allocation. +HANDLE_TARGET_OPCODE(TCOPY) /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1121,6 +1121,14 @@ let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } +def TCOPY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; + let isTerminator = 1; +} def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -140,6 +140,7 @@ // are not lowered to a COPY. switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::REG_SEQUENCE: @@ -235,6 +236,7 @@ switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: return UsedLanes; case TargetOpcode::REG_SEQUENCE: { @@ -337,6 +339,7 @@ break; } case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: break; default: diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -33,6 +33,8 @@ const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + MachineBasicBlock *TCopyDestBlock; + public: static char ID; // Pass identification, replacement for typeid ExpandPostRA() : MachineFunctionPass(ID) {} @@ -133,7 +135,6 @@ } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { - if (MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "dead copy: " << *MI); MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -163,8 +164,14 @@ } LLVM_DEBUG(dbgs() << "real copy: " << *MI); - TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), - DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); + MachineBasicBlock *CopyBlock = MI->getParent(); + MachineBasicBlock::iterator MII(MI); + if (MI->getOpcode() == TargetOpcode::TCOPY) { + CopyBlock = TCopyDestBlock; + MII = TCopyDestBlock->getFirstTerminator(); + } + TII->copyPhysReg(*CopyBlock, MII, MI->getDebugLoc(), DstMO.getReg(), + SrcMO.getReg(), SrcMO.isKill()); if (MI->getNumOperands() > 2) TransferImplicitOperands(MI); @@ -188,13 +195,11 @@ bool MadeChange = false; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); - mbbi != mbbe; ++mbbi) { - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); - mi != me;) { - MachineInstr &MI = *mi; + for (auto &MBB : MF) { + for (auto MII = MBB.begin(), ME = MBB.end(); MII != ME;) { + MachineInstr &MI = *MII; // Advance iterator here because MI may be erased. - ++mi; + ++MII; // Only expand pseudos. if (!MI.isPseudo()) @@ -208,10 +213,22 @@ // Expand standard pseudos. switch (MI.getOpcode()) { + case TargetOpcode::INLINEASM_BR: { + MachineBasicBlock::iterator Next(MI); + Next = detail::next_or_end(Next, MBB.end()); + if (Next == MBB.end() || Next->getOpcode() != TargetOpcode::TCOPY) + break; + + // Find the destination for any TCOPY instructions to sink into. + TCopyDestBlock = MBB.getInlineAsmBrDefaultTarget(); + assert(TCopyDestBlock && "Cannot find default dest block for callbr!"); + break; + } case TargetOpcode::SUBREG_TO_REG: MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -73,7 +73,7 @@ return false; } bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { - if (MI.getOpcode() != TargetOpcode::COPY) + if (!MI.isCopy()) return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -157,6 +157,7 @@ Depth); break; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::G_PHI: case TargetOpcode::PHI: { Known.One = APInt::getAllOnesValue(BitWidth); @@ -191,7 +192,7 @@ MRI.getType(SrcReg).isValid()) { // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, - Depth + (Opcode != TargetOpcode::COPY)); + Depth + !MI.isCopy()); Known.One &= Known2.One; Known.Zero &= Known2.Zero; // If we reach a point where we don't know anything @@ -435,7 +436,8 @@ unsigned FirstAnswer = 1; switch (Opcode) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { MachineOperand &Src = MI.getOperand(1); if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && MRI.getType(Src.getReg()).isValid()) { diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -165,7 +165,7 @@ ReachedBegin = true; else --MII; - if (MI.getOpcode() != TargetOpcode::COPY) + if (!MI.isCopy()) continue; Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1007,6 +1007,7 @@ break; } case TargetOpcode::COPY: + case TargetOpcode::TCOPY: assert(DstOps.size() == 1 && "Invalid Dst"); // If the caller wants to add a subreg source it has to be done separately // so we may not have any SrcOps at this point yet. diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -291,6 +291,7 @@ VReg = MI->getOperand(1).getReg(); break; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: VReg = MI->getOperand(1).getReg(); if (Register::isPhysicalRegister(VReg)) return None; @@ -352,7 +353,7 @@ auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) return None; - while (DefMI->getOpcode() == TargetOpcode::COPY) { + while (DefMI->isCopy()) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -271,6 +271,31 @@ return false; } +MachineBasicBlock *MachineBasicBlock::getInlineAsmBrDefaultTarget() { + if (llvm::none_of(terminators(), [](const MachineInstr &Term) { + return Term.getOpcode() == TargetOpcode::INLINEASM_BR; + })) + return nullptr; + + MachineBasicBlock *DefaultTarget = nullptr; + for (auto Succ : successors()) + if (!isInlineAsmBrIndirectTarget(Succ)) { + DefaultTarget = Succ; + break; + } + if (!DefaultTarget) { + const auto &Br = back(); + if (Br.isUnconditionalBranch()) { + for (const MachineOperand &MO : Br.operands()) + if (MO.isMBB()) { + DefaultTarget = MO.getMBB(); + break; + } + } + } + return DefaultTarget; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { print(dbgs()); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1192,7 +1192,7 @@ return false; } - if (isPosition() || isDebugInstr() || isTerminator() || + if (isPosition() || isDebugInstr() || (isTerminator() && !isCopy()) || mayRaiseFPException() || hasUnmodeledSideEffects()) return false; diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -838,6 +838,7 @@ /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, + const TargetInstrInfo *TII, SmallVectorImpl &DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug @@ -854,15 +855,17 @@ SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + // The copy no longer needs to be a terminator, so convert it to a normal + // COPY. + if (MI.getOpcode() == TargetOpcode::TCOPY) + MI.setDesc(TII->get(TargetOpcode::COPY)); + // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); - DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + for (auto *DbgMI : DbgValuesToSink) { + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); if (!attemptDebugCopyProp(MI, *DbgMI)) @@ -887,6 +890,11 @@ if (MI.isConvergent()) return false; + // Sink TCOPY instructions after register allocation to avoid mucking with + // live-ins. + if (MI.getOpcode() == TargetOpcode::TCOPY) + return false; + // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) @@ -1013,7 +1021,7 @@ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); - performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); + performSink(MI, *SuccToSinkTo, InsertPos, TII, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -1376,7 +1384,7 @@ // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); + performSink(*MI, *SuccBB, InsertPos, TII, DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -590,7 +590,6 @@ // it is an entry block or landing pad. for (const auto &LI : MBB->liveins()) { if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && - !MBB->isInlineAsmBrDefaultTarget() && MBB->getIterator() != MBB->getParent()->begin()) { report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB); report_context(LI.PhysReg); @@ -1503,6 +1502,19 @@ // Verify properties of various specific instruction types switch (MI->getOpcode()) { + case TargetOpcode::TCOPY: { + MachineBasicBlock::const_iterator MII(MI), MIE = MI->getParent()->end(); + for (; MII != MIE; ++MII) { + if (MII->getOpcode() != TargetOpcode::COPY) + continue; + report("TCOPY and COPY instructions are intermixed", &*MII); + errs() << "- TCOPY instruction: "; + if (Indexes && Indexes->hasIndex(*MI)) + errs() << Indexes->getInstructionIndex(*MI) << '\t'; + MI->print(errs(), /*SkipOpers=*/true); + } + LLVM_FALLTHROUGH; + } case TargetOpcode::COPY: { if (foundErrors) break; diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1094,6 +1094,7 @@ default: return nullptr; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: return new CopyRewriter(MI); case TargetOpcode::INSERT_SUBREG: return new InsertSubregRewriter(MI); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -512,7 +512,7 @@ static bool mayHaveSideEffects(MachineInstr &MI) { return MI.mayLoadOrStore() || MI.mayRaiseFPException() || - MI.hasUnmodeledSideEffects() || MI.isTerminator() || + MI.hasUnmodeledSideEffects() || (MI.isTerminator() && !MI.isCopy()) || MI.isCall() || MI.isBarrier() || MI.isBranch() || MI.isReturn(); } diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -82,6 +82,7 @@ /// Everything we know about a live virtual register. struct LiveReg { + MachineInstr *OrigMI = nullptr; ///< Instr the LiveReg comes from. MachineInstr *LastUse = nullptr; ///< Last instr to use reg. Register VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. @@ -443,7 +444,8 @@ continue; if (OnlyLiveOut && !mayLiveOut(LR.VirtReg)) continue; - spillVirtReg(MI, LR); + spillVirtReg(LR.OrigMI->getOpcode() == TargetOpcode::TCOPY ? LR.OrigMI : MI, + LR); } LiveVirtRegs.clear(); } @@ -798,6 +800,7 @@ addKillFlag(*LRI); } assert(LRI->PhysReg && "Register not assigned"); + LRI->OrigMI = &MI; LRI->LastUse = &MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; @@ -844,6 +847,7 @@ MO.setIsDead(false); } assert(LRI->PhysReg && "Register not assigned"); + LRI->OrigMI = &MI; LRI->LastUse = &MI; LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -175,8 +175,19 @@ } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - VRBase).addReg(SrcReg); + + // FIXME: The predicate to determine whether an instruction is a COPY or + // TCOPY should be generic. At this time though the criteria isn't + // well-known except for INLINEASM_BR instructions. + unsigned TgtOpc = + llvm::any_of(MBB->terminators(), + [](const MachineInstr &Term) { + return Term.getOpcode() == TargetOpcode::INLINEASM_BR; + }) + ? TargetOpcode::TCOPY + : TargetOpcode::COPY; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), VRBase) + .addReg(SrcReg); } SDValue Op(Node, ResNo); @@ -1010,8 +1021,15 @@ if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - DestReg).addReg(SrcReg); + unsigned TgtOpc = + llvm::any_of(MBB->terminators(), + [](const MachineInstr &Term) { + return Term.getOpcode() == TargetOpcode::INLINEASM_BR; + }) + ? TargetOpcode::TCOPY + : TargetOpcode::COPY; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), DestReg) + .addReg(SrcReg); break; } case ISD::CopyFromReg: { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1028,53 +1028,35 @@ } } - // Split after an INLINEASM_BR block with outputs. This allows us to keep the - // copy to/from register instructions from being between two terminator - // instructions, which causes the machine instruction verifier agita. - auto TI = llvm::find_if(*BB, [](const MachineInstr &MI){ - return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + // Split after an INLINEASM_BR block with outputs. This gives us a place to + // store output values. + auto InlineAsmBr = llvm::find_if(BB->terminators(), [](MachineInstr &term) { + return term.getOpcode() == TargetOpcode::INLINEASM_BR; }); - auto SplicePt = TI != BB->end() ? std::next(TI) : BB->end(); - if (TI != BB->end() && SplicePt != BB->end() && - TI->getOpcode() == TargetOpcode::INLINEASM_BR && - SplicePt->getOpcode() == TargetOpcode::COPY) { - MachineBasicBlock *FallThrough = BB->getFallThrough(); - if (!FallThrough) - for (const MachineOperand &MO : BB->back().operands()) - if (MO.isMBB()) { - FallThrough = MO.getMBB(); - break; - } - assert(FallThrough && "Cannot find default dest block for callbr!"); + auto TermIter = detail::next_or_end(InlineAsmBr, BB->end()); + if (InlineAsmBr != BB->end() && TermIter != BB->end() && + TermIter->getOpcode() == TargetOpcode::TCOPY) { + do { + ++TermIter; + } while (TermIter != BB->end() && + TermIter->getOpcode() == TargetOpcode::TCOPY); + + MachineBasicBlock *DefaultTarget = BB->getInlineAsmBrDefaultTarget(); + assert(DefaultTarget && "Cannot find default dest block for callbr!"); MachineBasicBlock *CopyBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); MachineFunction::iterator BBI(*BB); MF.insert(++BBI, CopyBB); + if (TermIter != BB->end()) + CopyBB->splice(CopyBB->begin(), BB, TermIter, BB->end()); - CopyBB->splice(CopyBB->begin(), BB, SplicePt, BB->end()); - CopyBB->setInlineAsmBrDefaultTarget(); - - CopyBB->addSuccessor(FallThrough, BranchProbability::getOne()); - BB->removeSuccessor(FallThrough); + CopyBB->addSuccessor(DefaultTarget, BranchProbability::getOne()); + BB->removeSuccessor(DefaultTarget); BB->addSuccessor(CopyBB, BranchProbability::getOne()); - // Mark all physical registers defined in the original block as being live - // on entry to the copy block. - for (const auto &MI : *CopyBB) - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg()) { - Register reg = MO.getReg(); - if (Register::isPhysicalRegister(reg)) { - CopyBB->addLiveIn(reg); - break; - } - } - CopyBB->normalizeSuccProbs(); BB->normalizeSuccProbs(); - BB->transferInlineAsmBrIndirectTargets(CopyBB); - InsertPos = CopyBB->end(); return CopyBB; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2865,7 +2865,6 @@ // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; - Return->setInlineAsmBrDefaultTarget(); // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -652,7 +652,7 @@ // registers. Note that getDefIgnoringCopies does not ignore copies from // physical registers. MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); - if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + if (!RegDef || !RegDef->isCopy()) { LLVM_DEBUG( dbgs() << "... Parameter was not copied into a VReg, cannot tail call.\n"); diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -4547,7 +4547,7 @@ // have emitted a zero-extending load, but we need a sign-extending load. bool IsZExt = isa(I); const auto *LoadMI = MI; - if (LoadMI->getOpcode() == TargetOpcode::COPY && + if (LoadMI->isCopy() && LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { Register LoadReg = MI->getOperand(1).getReg(); LoadMI = MRI.getUniqueVRegDef(LoadReg); @@ -4571,8 +4571,7 @@ .addImm(AArch64::sub_32); Reg = Reg64; } else { - assert((MI->getOpcode() == TargetOpcode::COPY && - MI->getOperand(1).getSubReg() == AArch64::sub_32) && + assert((MI->isCopy() && MI->getOperand(1).getSubReg() == AArch64::sub_32) && "Expected copy instruction"); Reg = MI->getOperand(1).getReg(); MachineBasicBlock::iterator I(MI); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -719,9 +719,8 @@ } if (Subtarget.hasZeroCycleZeroingGP()) { - if (Opcode == TargetOpcode::COPY && - (MI.getOperand(1).getReg() == AArch64::WZR || - MI.getOperand(1).getReg() == AArch64::XZR)) + if (MI.isCopy() && (MI.getOperand(1).getReg() == AArch64::WZR || + MI.getOperand(1).getReg() == AArch64::XZR)) return true; } @@ -1616,6 +1615,7 @@ case AArch64::ANDXri: return MI.getOperand(1).getReg() == AArch64::XZR; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: return MI.getOperand(1).getReg() == AArch64::WZR; } return false; @@ -1627,7 +1627,8 @@ switch (MI.getOpcode()) { default: break; - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { // GPR32 copies will by lowered to ORRXrs Register DstReg = MI.getOperand(0).getReg(); return (AArch64::GPR32RegClass.contains(DstReg) || @@ -1657,7 +1658,8 @@ switch (MI.getOpcode()) { default: break; - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { // FPR64 copies will by lowered to ORR.16b Register DstReg = MI.getOperand(0).getReg(); return (AArch64::FPR64RegClass.contains(DstReg) || diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3873,11 +3873,11 @@ // We can skip over G_TRUNC since the condition is 1-bit. // Truncating/extending can have no impact on the value. unsigned Opc = CondDef->getOpcode(); - if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) + if (!CondDef->isCopy() && Opc != TargetOpcode::G_TRUNC) break; // Can't see past copies from physregs. - if (Opc == TargetOpcode::COPY && + if (CondDef->isCopy() && Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) return false; @@ -5495,6 +5495,7 @@ default: return true; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::G_BITCAST: case TargetOpcode::G_TRUNC: case TargetOpcode::G_PHI: diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -467,15 +467,13 @@ bool AArch64RegisterBankInfo::hasFPConstraints( const MachineInstr &MI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - unsigned Op = MI.getOpcode(); - // Do we have an explicit floating point instruction? - if (isPreISelGenericFloatingPointOpcode(Op)) + if (isPreISelGenericFloatingPointOpcode(MI.getOpcode())) return true; // No. Check if we have a copy-like instruction. If we do, then we could // still be fed by floating point instructions. - if (Op != TargetOpcode::COPY && !MI.isPHI()) + if (!MI.isCopy() && !MI.isPHI()) return false; // MI is copy-like. Return true if it outputs an FPR. @@ -518,7 +516,7 @@ // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. - if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || + if ((!MI.isCopy() && !isPreISelGenericOpcode(Opc)) || Opc == TargetOpcode::G_PHI) { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); @@ -569,7 +567,8 @@ &ValMappings[Shift64Imm], 3); return getSameKindOfOperandsMapping(MI); } - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); // Check if one of the register is not a generic register. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3820,8 +3820,7 @@ // Move the COPY of the input reg to the beginning, so that we can use it. for (auto I = BB->begin(); I != &MI; I++) { - if (I->getOpcode() != TargetOpcode::COPY || - I->getOperand(0).getReg() != InputReg) + if (!I->isCopy() || I->getOperand(0).getReg() != InputReg) continue; if (I == FirstMI) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -802,7 +802,7 @@ else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; - if (Opcode == TargetOpcode::COPY) { + if (Opcode == TargetOpcode::COPY || Opcode == TargetOpcode::TCOPY) { copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), RI.getSubReg(SrcReg, SubIdx), KillSrc); continue; diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp --- a/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -739,7 +739,8 @@ break; } - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { // COPY can transfer a smaller register into a wider one. // If that is the case, fill the remaining high bits with 0. RegisterRef RD = MI.getOperand(0); diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1308,7 +1308,7 @@ NextI = std::next(I); MachineInstr *MI = &*I; - if (MI->getOpcode() == TargetOpcode::COPY) + if (MI->isCopy()) continue; if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) continue; @@ -1652,6 +1652,7 @@ bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { switch (Opc) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::REG_SEQUENCE: case Hexagon::A4_combineir: case Hexagon::A4_combineri: @@ -1675,6 +1676,7 @@ switch (Opc) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case Hexagon::A2_tfr: case Hexagon::A2_tfrp: { BitTracker::RegisterRef RS = MI.getOperand(1); @@ -2713,8 +2715,7 @@ Defs.clear(); HBS::getInstrDefs(*MI, Defs); - unsigned Opc = MI->getOpcode(); - if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) + if (MI->isCopy() || MI->getOpcode() == TargetOpcode::REG_SEQUENCE) continue; if (MI->mayStore()) { @@ -2987,6 +2988,7 @@ unsigned Opc = MI->getOpcode(); switch (Opc) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case Hexagon::S2_lsr_i_r: case Hexagon::S2_asr_i_r: case Hexagon::S2_asl_i_r: diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -2091,6 +2091,7 @@ switch (Opc) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: Changed |= expandCopy(B, I, MRI, HII, NewRegs); break; case Hexagon::STriw_pred: diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -212,6 +212,7 @@ switch (Opc) { case Hexagon::C2_tfrpr: case TargetOpcode::COPY: + case TargetOpcode::TCOPY: if (isPredReg(MI->getOperand(1).getReg())) { RegisterSubReg RD = MI->getOperand(0); if (Register::isVirtualRegister(RD.R)) @@ -255,7 +256,7 @@ MachineInstr *DefI = MRI->getVRegDef(Reg.R); assert(DefI); unsigned Opc = DefI->getOpcode(); - if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) { + if (Opc == Hexagon::C2_tfrpr || DefI->isCopy()) { assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse()); RegisterSubReg PR = DefI->getOperand(1); G2P.insert(std::make_pair(Reg, PR)); @@ -331,7 +332,8 @@ return false; unsigned DefOpc = DefI->getOpcode(); switch (DefOpc) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; if (MRI->getRegClass(PR.R) != PredRC) return false; @@ -468,7 +470,7 @@ for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (MI.getOpcode() != TargetOpcode::COPY) + if (!MI.isCopy()) continue; RegisterSubReg DR = MI.getOperand(0); RegisterSubReg SR = MI.getOperand(1); diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1516,6 +1516,7 @@ unsigned DOpc = DI->getOpcode(); switch (DOpc) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case Hexagon::A2_tfrsi: case Hexagon::A2_tfrpi: case Hexagon::CONST32: diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -1019,9 +1019,10 @@ } // for (Node : Results) assert(Node.Ty != MVT::Other); - SDNode *ResN = (Node.Opc == TargetOpcode::COPY) - ? Ops.front().getNode() - : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); + SDNode *ResN = + (Node.Opc == TargetOpcode::COPY || Node.Opc == TargetOpcode::TCOPY) + ? Ops.front().getNode() + : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); Output.push_back(SDValue(ResN, 0)); } diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1031,7 +1031,8 @@ }; switch (Opc) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { MachineOperand &MD = MI.getOperand(0); MachineOperand &MS = MI.getOperand(1); MachineBasicBlock::iterator MBBI = MI.getIterator(); @@ -2359,6 +2360,7 @@ case TargetOpcode::REG_SEQUENCE: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::INLINEASM: case TargetOpcode::PHI: return false; diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp --- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -111,6 +111,7 @@ case TargetOpcode::REG_SEQUENCE: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: break; @@ -167,6 +168,7 @@ case TargetOpcode::CFI_INSTRUCTION: case TargetOpcode::EH_LABEL: case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: break; diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -219,8 +219,7 @@ // PHI can be anything after RA. // COPY can remateriaze things in between feeder, compare and nvj. if (MII->getOpcode() == TargetOpcode::KILL || - MII->getOpcode() == TargetOpcode::PHI || - MII->getOpcode() == TargetOpcode::COPY) + MII->getOpcode() == TargetOpcode::PHI || MII->isCopy()) return false; // The following pseudo Hexagon instructions sets "use" and "def" @@ -293,7 +292,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); if (secondReg && !Register::isPhysicalRegister(cmpOp2)) { MachineInstr *def = MRI.getVRegDef(cmpOp2); - if (def->getOpcode() == TargetOpcode::COPY) + if (def->isCopy()) return false; } } diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp --- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -172,6 +172,7 @@ case TargetOpcode::PHI: case TargetOpcode::COPY: + case TargetOpcode::TCOPY: break; case Hexagon::L2_loadrd_io: @@ -322,6 +323,7 @@ return 0; return 10; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: if (MI->getOperand(1).getSubReg() != 0) return 10; return 0; @@ -1002,7 +1004,8 @@ switch (Opc) { case TargetOpcode::PHI: - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { Register DstR = MI->getOperand(0).getReg(); if (MRI->getRegClass(DstR) == DoubleRC) { createHalfInstr(Opc, MI, PairMap, isub_lo); diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -40,7 +40,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { unsigned Opc = MI->getOpcode(); switch (Opc) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { const MachineOperand &Dst = MI->getOperand(0); const MachineOperand &Src = MI->getOperand(1); RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -185,7 +185,7 @@ for (MachineInstr &UseMI : MRI.use_instructions(Reg)) { MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI); // Copy with many uses. - if (NonCopyInstr->getOpcode() == TargetOpcode::COPY && + if (NonCopyInstr->isCopy() && !Register::isPhysicalRegister(NonCopyInstr->getOperand(0).getReg())) addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI); else @@ -207,7 +207,7 @@ const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; - while (Ret->getOpcode() == TargetOpcode::COPY && + while (Ret->isCopy() && !Register::isPhysicalRegister(Ret->getOperand(0).getReg()) && MRI.hasOneUse(Ret->getOperand(0).getReg())) { Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg())); @@ -221,7 +221,7 @@ const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; - while (Ret->getOpcode() == TargetOpcode::COPY && + while (Ret->isCopy() && !Register::isPhysicalRegister(Ret->getOperand(1).getReg())) Ret = MRI.getVRegDef(Ret->getOperand(1).getReg()); return Ret; @@ -324,7 +324,7 @@ // Determine InstType from register bank of phys register that is // 'isDefUse ? def : use' of this copy. - if (AdjMI->getOpcode() == TargetOpcode::COPY) { + if (AdjMI->isCopy()) { setTypesAccordingToPhysicalRegister(MI, AdjMI, isDefUse ? 0 : 1); return true; } diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -153,6 +153,7 @@ MBB.erase(I); return false; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: if (!expandCopy(MBB, I)) return false; break; diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -174,7 +174,8 @@ return true; } case NVPTX::nvvm_move_i64: - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx); if (Res) { InstrsToRemove.insert(&TexHandleDef); diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -235,7 +235,7 @@ double getExtraCost(const MachineInstr *MI, MachineRegisterInfo *MRI) const override { - assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); + assert(MI->isCopy() && "Expected a COPY"); for (auto &MO : MI->operands()) { // Physical registers will not be converted. Assume that converting the diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -384,8 +384,7 @@ ReversePostOrderTraversal RPOT(&MF); for (MachineBasicBlock *MBB : RPOT) for (MachineInstr &MI : *MBB) - if (MI.getOpcode() == TargetOpcode::COPY && - MI.getOperand(0).getReg() == X86::EFLAGS) + if (MI.isCopy() && MI.getOperand(0).getReg() == X86::EFLAGS) Copies.push_back(&MI); for (MachineInstr *CopyI : Copies) { @@ -395,7 +394,7 @@ assert(VOp.isReg() && "The input to the copy for EFLAGS should always be a register!"); MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); - if (CopyDefI.getOpcode() != TargetOpcode::COPY) { + if (!CopyDefI.isCopy()) { // FIXME: The big likely candidate here are PHI nodes. We could in theory // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard // enough that it is probably better to change every other part of LLVM @@ -624,7 +623,7 @@ rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) { rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); - } else if (MI.getOpcode() == TargetOpcode::COPY) { + } else if (MI.isCopy()) { rewriteCopy(MI, *FlagUse, CopyDefI); } else { // We assume all other instructions that use flags also def them. @@ -716,9 +715,8 @@ #ifndef NDEBUG for (MachineBasicBlock &MBB : MF) for (MachineInstr &MI : MBB) - if (MI.getOpcode() == TargetOpcode::COPY && - (MI.getOperand(0).getReg() == X86::EFLAGS || - MI.getOperand(1).getReg() == X86::EFLAGS)) { + if (MI.isCopy() && (MI.getOperand(0).getReg() == X86::EFLAGS || + MI.getOperand(1).getReg() == X86::EFLAGS)) { LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump()); llvm_unreachable("Unlowered EFLAGS copy!"); diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -1458,7 +1458,8 @@ switch (MI.getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::TCOPY: { // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. const MachineOperand &MO1 = MI.getOperand(1); const MachineOperand &MO0 = MI.getOperand(0); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -144,6 +144,7 @@ // Some target-independent operations that trivially lower to data-invariant // instructions. case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: return true; diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll --- a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 @X = common local_unnamed_addr global i32 0, align 4 @@ -9,6 +10,13 @@ ; CHECK-LABEL: .LBB0_1: // %cleanup ; CHECK-LABEL: .Ltmp0: ; CHECK-LABEL: .LBB0_2: // %indirect + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_1: // %indirect +; CHECK-O0-LABEL: .LBB0_2: // %cleanup entry: callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test1, %indirect)) to label %cleanup [label %indirect] @@ -22,7 +30,8 @@ } define void @test2() { -; CHECK-LABEL: test2: +; CHECK-LABEL: test2: +; CHECK-O0-LABEL: test2: entry: %0 = load i32, i32* @X, align 4 %and = and i32 %0, 1 @@ -34,6 +43,11 @@ ; CHECK-NEXT: .word .Ltmp2 ; CHECK-LABEL: .Ltmp2: ; CHECK-NEXT: .LBB1_3: // %if.end6 + +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp3 +; CHECK-O0-LABEL: .Ltmp3: +; CHECK-O0-NEXT: .LBB1_3: // %if.end6 callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test2, %if.end6)) to label %if.then4 [label %if.end6] @@ -50,6 +64,9 @@ if.then9: ; CHECK-LABEL: .Ltmp4: ; CHECK-NEXT: .LBB1_5: // %l_yes + +; CHECK-O0-LABEL: .Ltmp5: +; CHECK-O0-NEXT: .LBB1_6: // %l_yes callbr void asm sideeffect "", "X"(i8* blockaddress(@test2, %l_yes)) to label %if.end10 [label %l_yes] diff --git a/llvm/test/CodeGen/SystemZ/asm-20.ll b/llvm/test/CodeGen/SystemZ/asm-20.ll --- a/llvm/test/CodeGen/SystemZ/asm-20.ll +++ b/llvm/test/CodeGen/SystemZ/asm-20.ll @@ -1,6 +1,7 @@ ; Test that asm goto can be compiled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O0 define i32 @c() { entry: diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll --- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: @@ -9,6 +10,16 @@ ; CHECK-NEXT: callq foo ; CHECK-LABEL: .Ltmp0: ; CHECK-NEXT: # %bb.2: # %baz + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .quad .Ltmp0 +; CHECK-O0-NEXT: .quad .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_2: # %bar +; CHECK-O0-NEXT: movl +; CHECK-O0-NEXT: callq foo +; CHECK-O0-LABEL: .Ltmp0: +; CHECK-O0-NEXT: # %bb.3: # %baz entry: callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %baz), i8* blockaddress(@test1, %bar)) to label %asm.fallthrough [label %bar] diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK + +%struct.kernel_rseq = type { i32, i32, i8*, i32, [12 x i8] } + +@__rseq_abi = external thread_local global %struct.kernel_rseq, align 32 + +define i32 @test1(i8* %percpu_data, i64 %lock_value) #0 { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movq %rdi, -16(%rbp) +; CHECK-NEXT: movq %rsi, -24(%rbp) +; CHECK-NEXT: movq -16(%rbp), %rax +; CHECK-NEXT: movq -24(%rbp), %rcx +; CHECK-NEXT: movq __rseq_abi@{{.*}}(%rip), %rdx +; CHECK-NEXT: movq %fs:0, %rsi +; CHECK-NEXT: leaq 8(%rsi,%rdx), %rdi +; CHECK-NEXT: leaq 4(%rsi,%rdx), %rdx +; CHECK-NEXT: #APP +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: leaq __rseq_cs_RseqFunction_PerCpuTryLock_0(%rip), %rsi +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: movl (%rdx), %r8d +; CHECK-NEXT: movl %r8d, %esi +; CHECK-NEXT: shlq $12, %rsi +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: cmpq $0, (%rsi) +; CHECK-NEXT: jne .Ltmp0 +; CHECK-NEXT: movq %rcx, (%rsi) +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl %r8d, -40(%rbp) # 4-byte Spill +; CHECK-NEXT: movq %rsi, -48(%rbp) # 8-byte Spill +; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: jmp .LBB0_1 +entry: + %retval = alloca i32, align 4 + %percpu_data.addr = alloca i8*, align 8 + %lock_value.addr = alloca i64, align 8 + %scratch = alloca i64, align 8 + %cpu = alloca i32, align 4 + store i8* %percpu_data, i8** %percpu_data.addr, align 8 + store i64 %lock_value, i64* %lock_value.addr, align 8 + %0 = load i8*, i8** %percpu_data.addr, align 8 + %1 = load i64, i64* %lock_value.addr, align 8 + %2 = callbr { i64, i32 } asm "3:\0Alea __rseq_cs_RseqFunction_PerCpuTryLock_${:uid}(%rip), $0\0Amov $0, ($2)\0A4:\0Amov ($3), $1\0Amov $1, ${0:k}\0Ashl $5, $0\0Aadd $6, $0\0Acmpq $$0, ($0)\0Ajne ${8:l}\0Amov $7, ($0)\0A5:", "=&r,=&r,r,r,n,n,r,r,X,~{cc},~{memory},~{dirflag},~{fpsr},~{flags}"(i8** getelementptr inbounds (%struct.kernel_rseq, %struct.kernel_rseq* @__rseq_abi, i32 0, i32 2), i32* getelementptr inbounds (%struct.kernel_rseq, %struct.kernel_rseq* @__rseq_abi, i32 0, i32 1), i32 1392848979, i32 12, i8* %0, i64 %1, i8* blockaddress(@test1, %fail_contended)) #1 + to label %asm.fallthrough [label %fail_contended] + +asm.fallthrough: ; preds = %entry + %asmresult = extractvalue { i64, i32 } %2, 0 + %asmresult1 = extractvalue { i64, i32 } %2, 1 + store i64 %asmresult, i64* %scratch, align 8 + store i32 %asmresult1, i32* %cpu, align 4 + %3 = load i32, i32* %cpu, align 4 + store i32 %3, i32* %retval, align 4 + br label %return + +fail_contended: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %fail_contended, %asm.fallthrough + %4 = load i32, i32* %retval, align 4 + ret i32 %4 +} + +attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll --- a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll @@ -51,8 +51,8 @@ ; CHECK-NEXT: jne .Ltmp1 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: .LBB1_2: # %if.then +; CHECK-NEXT: addl %esi, %edi ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: addl %esi, %eax ; CHECK-NEXT: .Ltmp2: # Block address taken ; CHECK-NEXT: .LBB1_6: # %return ; CHECK-NEXT: popl %esi