diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -170,9 +170,6 @@
   // Indicate that this basic block ends a section.
   bool IsEndSection = false;
 
-  /// Default target of the callbr of a basic block.
-  bool InlineAsmBrDefaultTarget = false;
-
   /// List of indirect targets of the callbr of a basic block.
   SmallPtrSet<const MachineBasicBlock*, 4> InlineAsmBrIndirectTargets;
 
@@ -484,22 +481,8 @@
     InlineAsmBrIndirectTargets.insert(Tgt);
   }
 
-  /// Transfers indirect targets to INLINEASM_BR's copy block.
-  void transferInlineAsmBrIndirectTargets(MachineBasicBlock *CopyBB) {
-    for (auto *Target : InlineAsmBrIndirectTargets)
-      CopyBB->addInlineAsmBrIndirectTarget(Target);
-    return InlineAsmBrIndirectTargets.clear();
-  }
-
-  /// Returns true if this is the default dest of an INLINEASM_BR.
-  bool isInlineAsmBrDefaultTarget() const {
-    return InlineAsmBrDefaultTarget;
-  }
-
-  /// Indicates if this is the default deft of an INLINEASM_BR.
-  void setInlineAsmBrDefaultTarget() {
-    InlineAsmBrDefaultTarget = true;
-  }
+  /// Returns the default destination of an INLINEASM_BR instruction.
+  MachineBasicBlock *getInlineAsmBrDefaultTarget();
 
   /// Returns true if it is legal to hoist instructions into this block.
   bool isLegalToHoistInto() const;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1128,7 +1128,8 @@
   }
 
   bool isCopy() const {
-    return getOpcode() == TargetOpcode::COPY;
+    return getOpcode() == TargetOpcode::COPY ||
+           getOpcode() == TargetOpcode::TCOPY;
   }
 
   bool isFullCopy() const {
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -72,7 +72,7 @@
 /// virtual registers have been created for all the instructions, and it's
 /// only needed in cases where the register classes implied by the
 /// instructions are insufficient. It is emitted as a COPY MachineInstr.
-  HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
+HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
 
 /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
 HANDLE_TARGET_OPCODE(DBG_VALUE)
@@ -91,11 +91,17 @@
 /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
 /// After register coalescing references of v1024 should be replace with
 /// v1027:3, v1025 with v1027:4, etc.
-  HANDLE_TARGET_OPCODE(REG_SEQUENCE)
+HANDLE_TARGET_OPCODE(REG_SEQUENCE)
 
 /// COPY - Target-independent register copy. This instruction can also be
 /// used to copy between subregisters of virtual registers.
-  HANDLE_TARGET_OPCODE(COPY)
+HANDLE_TARGET_OPCODE(COPY)
+
+/// TCOPY - This instruction is the terminator version of COPY. The purpose
+/// is to allow copies from terminators to be properly represented (e.g. an
+/// INLINEASM_BR that defines a physical register) without having
+/// to introduce "live-ins" for physical registers before register allocation.
+HANDLE_TARGET_OPCODE(TCOPY)
 
 /// BUNDLE - This instruction represents an instruction bundle. Instructions
 /// which immediately follow a BUNDLE instruction which are marked with
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1121,6 +1121,14 @@
   let isAsCheapAsAMove = 1;
   let hasNoSchedulingInfo = 0;
 }
+def TCOPY : StandardPseudoInstruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src);
+  let AsmString = "";
+  let hasSideEffects = 0;
+  let isAsCheapAsAMove = 1;
+  let isTerminator = 1;
+}
 def BUNDLE : StandardPseudoInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
--- a/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -140,6 +140,7 @@
   // are not lowered to a COPY.
   switch (MI.getOpcode()) {
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::PHI:
   case TargetOpcode::INSERT_SUBREG:
   case TargetOpcode::REG_SEQUENCE:
@@ -235,6 +236,7 @@
 
   switch (MI.getOpcode()) {
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::PHI:
     return UsedLanes;
   case TargetOpcode::REG_SEQUENCE: {
@@ -337,6 +339,7 @@
     break;
   }
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::PHI:
     break;
   default:
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -33,6 +33,8 @@
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
 
+  MachineBasicBlock *TCopyDestBlock;
+
 public:
   static char ID; // Pass identification, replacement for typeid
   ExpandPostRA() : MachineFunctionPass(ID) {}
@@ -133,7 +135,6 @@
 }
 
 bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
-
   if (MI->allDefsAreDead()) {
     LLVM_DEBUG(dbgs() << "dead copy: " << *MI);
     MI->setDesc(TII->get(TargetOpcode::KILL));
@@ -163,8 +164,14 @@
   }
 
   LLVM_DEBUG(dbgs() << "real copy:   " << *MI);
-  TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
-                   DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+  MachineBasicBlock *CopyBlock = MI->getParent();
+  MachineBasicBlock::iterator MII(MI);
+  if (MI->getOpcode() == TargetOpcode::TCOPY) {
+    CopyBlock = TCopyDestBlock;
+    MII = TCopyDestBlock->getFirstTerminator();
+  }
+  TII->copyPhysReg(*CopyBlock, MII, MI->getDebugLoc(), DstMO.getReg(),
+                   SrcMO.getReg(), SrcMO.isKill());
 
   if (MI->getNumOperands() > 2)
     TransferImplicitOperands(MI);
@@ -188,13 +195,11 @@
 
   bool MadeChange = false;
 
-  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
-       mbbi != mbbe; ++mbbi) {
-    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
-         mi != me;) {
-      MachineInstr &MI = *mi;
+  for (auto &MBB : MF) {
+    for (auto MII = MBB.begin(), ME = MBB.end(); MII != ME;) {
+      MachineInstr &MI = *MII;
       // Advance iterator here because MI may be erased.
-      ++mi;
+      ++MII;
 
       // Only expand pseudos.
       if (!MI.isPseudo())
@@ -208,10 +213,22 @@
 
       // Expand standard pseudos.
       switch (MI.getOpcode()) {
+      case TargetOpcode::INLINEASM_BR: {
+        MachineBasicBlock::iterator Next(MI);
+        Next = detail::next_or_end(Next, MBB.end());
+        if (Next == MBB.end() || Next->getOpcode() != TargetOpcode::TCOPY)
+          break;
+
+        // Find the destination for any TCOPY instructions to sink into.
+        TCopyDestBlock = MBB.getInlineAsmBrDefaultTarget();
+        assert(TCopyDestBlock && "Cannot find default dest block for callbr!");
+        break;
+      }
       case TargetOpcode::SUBREG_TO_REG:
         MadeChange |= LowerSubregToReg(&MI);
         break;
       case TargetOpcode::COPY:
+      case TargetOpcode::TCOPY:
         MadeChange |= LowerCopy(&MI);
         break;
       case TargetOpcode::DBG_VALUE:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -73,7 +73,7 @@
   return false;
 }
 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
-  if (MI.getOpcode() != TargetOpcode::COPY)
+  if (!MI.isCopy())
     return false;
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -157,6 +157,7 @@
                                       Depth);
     break;
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::G_PHI:
   case TargetOpcode::PHI: {
     Known.One = APInt::getAllOnesValue(BitWidth);
@@ -191,7 +192,7 @@
           MRI.getType(SrcReg).isValid()) {
         // For COPYs we don't do anything, don't increase the depth.
         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
-                             Depth + (Opcode != TargetOpcode::COPY));
+                             Depth + !MI.isCopy());
         Known.One &= Known2.One;
         Known.Zero &= Known2.Zero;
         // If we reach a point where we don't know anything
@@ -435,7 +436,8 @@
 
   unsigned FirstAnswer = 1;
   switch (Opcode) {
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     MachineOperand &Src = MI.getOperand(1);
     if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
         MRI.getType(Src.getReg()).isValid()) {
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -165,7 +165,7 @@
         ReachedBegin = true;
       else
         --MII;
-      if (MI.getOpcode() != TargetOpcode::COPY)
+      if (!MI.isCopy())
         continue;
       Register SrcReg = MI.getOperand(1).getReg();
       Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -1007,6 +1007,7 @@
     break;
   }
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
     assert(DstOps.size() == 1 && "Invalid Dst");
     // If the caller wants to add a subreg source it has to be done separately
     // so we may not have any SrcOps at this point yet.
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -291,6 +291,7 @@
       VReg = MI->getOperand(1).getReg();
       break;
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
       VReg = MI->getOperand(1).getReg();
       if (Register::isPhysicalRegister(VReg))
         return None;
@@ -352,7 +353,7 @@
   auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
   if (!DstTy.isValid())
     return None;
-  while (DefMI->getOpcode() == TargetOpcode::COPY) {
+  while (DefMI->isCopy()) {
     Register SrcReg = DefMI->getOperand(1).getReg();
     auto SrcTy = MRI.getType(SrcReg);
     if (!SrcTy.isValid() || SrcTy != DstTy)
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -271,6 +271,31 @@
   return false;
 }
 
+MachineBasicBlock *MachineBasicBlock::getInlineAsmBrDefaultTarget() {
+  if (llvm::none_of(terminators(), [](const MachineInstr &Term) {
+        return Term.getOpcode() == TargetOpcode::INLINEASM_BR;
+      }))
+    return nullptr;
+
+  MachineBasicBlock *DefaultTarget = nullptr;
+  for (auto Succ : successors())
+    if (!isInlineAsmBrIndirectTarget(Succ)) {
+      DefaultTarget = Succ;
+      break;
+    }
+  if (!DefaultTarget) {
+    const auto &Br = back();
+    if (Br.isUnconditionalBranch()) {
+      for (const MachineOperand &MO : Br.operands())
+        if (MO.isMBB()) {
+          DefaultTarget = MO.getMBB();
+          break;
+        }
+    }
+  }
+  return DefaultTarget;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
   print(dbgs());
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1192,7 +1192,7 @@
     return false;
   }
 
-  if (isPosition() || isDebugInstr() || isTerminator() ||
+  if (isPosition() || isDebugInstr() || (isTerminator() && !isCopy()) ||
       mayRaiseFPException() || hasUnmodeledSideEffects())
     return false;
 
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -838,6 +838,7 @@
 /// Sink an instruction and its associated debug instructions.
 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
                         MachineBasicBlock::iterator InsertPos,
+                        const TargetInstrInfo *TII,
                         SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
 
   // If we cannot find a location to use (merge with), then we erase the debug
@@ -854,15 +855,17 @@
   SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
                       ++MachineBasicBlock::iterator(MI));
 
+  // The copy no longer needs to be a terminator, so convert it to a normal
+  // COPY.
+  if (MI.getOpcode() == TargetOpcode::TCOPY)
+    MI.setDesc(TII->get(TargetOpcode::COPY));
+
   // Sink a copy of debug users to the insert position. Mark the original
   // DBG_VALUE location as 'undef', indicating that any earlier variable
   // location should be terminated as we've optimised away the value at this
   // point.
-  for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
-                                                 DBE = DbgValuesToSink.end();
-       DBI != DBE; ++DBI) {
-    MachineInstr *DbgMI = *DBI;
-    MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
+  for (auto *DbgMI : DbgValuesToSink) {
+    MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
     SuccToSinkTo.insert(InsertPos, NewDbgMI);
 
     if (!attemptDebugCopyProp(MI, *DbgMI))
@@ -887,6 +890,11 @@
   if (MI.isConvergent())
     return false;
 
+  // Sink TCOPY instructions after register allocation to avoid mucking with
+  // live-ins.
+  if (MI.getOpcode() == TargetOpcode::TCOPY)
+    return false;
+
   // Don't break implicit null checks.  This is a performance heuristic, and not
   // required for correctness.
   if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
@@ -1013,7 +1021,7 @@
   if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
     SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
 
-  performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
+  performSink(MI, *SuccToSinkTo, InsertPos, TII, DbgUsersToSink);
 
   // Conservatively, clear any kill flags, since it's possible that they are no
   // longer correct.
@@ -1376,7 +1384,7 @@
     // block.
     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
-    performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
+    performSink(*MI, *SuccBB, InsertPos, TII, DbgValsToSink);
     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 
     Changed = true;
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -590,7 +590,6 @@
     // it is an entry block or landing pad.
     for (const auto &LI : MBB->liveins()) {
       if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
-          !MBB->isInlineAsmBrDefaultTarget() &&
           MBB->getIterator() != MBB->getParent()->begin()) {
         report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
         report_context(LI.PhysReg);
@@ -1503,6 +1502,19 @@
 
   // Verify properties of various specific instruction types
   switch (MI->getOpcode()) {
+  case TargetOpcode::TCOPY: {
+    MachineBasicBlock::const_iterator MII(MI), MIE = MI->getParent()->end();
+    for (; MII != MIE; ++MII) {
+      if (MII->getOpcode() != TargetOpcode::COPY)
+        continue;
+      report("TCOPY and COPY instructions are intermixed", &*MII);
+      errs() << "- TCOPY instruction: ";
+      if (Indexes && Indexes->hasIndex(*MI))
+        errs() << Indexes->getInstructionIndex(*MI) << '\t';
+      MI->print(errs(), /*SkipOpers=*/true);
+    }
+    LLVM_FALLTHROUGH;
+  }
   case TargetOpcode::COPY: {
     if (foundErrors)
       break;
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1094,6 +1094,7 @@
   default:
     return nullptr;
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
     return new CopyRewriter(MI);
   case TargetOpcode::INSERT_SUBREG:
     return new InsertSubregRewriter(MI);
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -512,7 +512,7 @@
 
 static bool mayHaveSideEffects(MachineInstr &MI) {
   return MI.mayLoadOrStore() || MI.mayRaiseFPException() ||
-         MI.hasUnmodeledSideEffects() || MI.isTerminator() ||
+         MI.hasUnmodeledSideEffects() || (MI.isTerminator() && !MI.isCopy()) ||
          MI.isCall() || MI.isBarrier() || MI.isBranch() || MI.isReturn();
 }
 
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -82,6 +82,7 @@
 
     /// Everything we know about a live virtual register.
     struct LiveReg {
+      MachineInstr *OrigMI = nullptr;  ///< Instr the LiveReg comes from.
       MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
       Register VirtReg;                ///< Virtual register number.
       MCPhysReg PhysReg = 0;           ///< Currently held here.
@@ -443,7 +444,8 @@
       continue;
     if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
       continue;
-    spillVirtReg(MI, LR);
+    spillVirtReg(LR.OrigMI->getOpcode() == TargetOpcode::TCOPY ? LR.OrigMI : MI,
+                 LR);
   }
   LiveVirtRegs.clear();
 }
@@ -798,6 +800,7 @@
       addKillFlag(*LRI);
   }
   assert(LRI->PhysReg && "Register not assigned");
+  LRI->OrigMI = &MI;
   LRI->LastUse = &MI;
   LRI->LastOpNum = OpNum;
   LRI->Dirty = true;
@@ -844,6 +847,7 @@
     MO.setIsDead(false);
   }
   assert(LRI->PhysReg && "Register not assigned");
+  LRI->OrigMI = &MI;
   LRI->LastUse = &MI;
   LRI->LastOpNum = OpNum;
   markRegUsedInInstr(LRI->PhysReg);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -175,8 +175,19 @@
   } else {
     // Create the reg, emit the copy.
     VRBase = MRI->createVirtualRegister(DstRC);
-    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
-            VRBase).addReg(SrcReg);
+
+    // FIXME: The predicate to determine whether an instruction is a COPY or
+    // TCOPY should be generic. At this time though the criteria isn't
+    // well-known except for INLINEASM_BR instructions.
+    unsigned TgtOpc =
+        llvm::any_of(MBB->terminators(),
+                     [](const MachineInstr &Term) {
+                       return Term.getOpcode() == TargetOpcode::INLINEASM_BR;
+                     })
+            ? TargetOpcode::TCOPY
+            : TargetOpcode::COPY;
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), VRBase)
+        .addReg(SrcReg);
   }
 
   SDValue Op(Node, ResNo);
@@ -1010,8 +1021,15 @@
     if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
       break;
 
-    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
-            DestReg).addReg(SrcReg);
+    unsigned TgtOpc =
+        llvm::any_of(MBB->terminators(),
+                     [](const MachineInstr &Term) {
+                       return Term.getOpcode() == TargetOpcode::INLINEASM_BR;
+                     })
+            ? TargetOpcode::TCOPY
+            : TargetOpcode::COPY;
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), DestReg)
+        .addReg(SrcReg);
     break;
   }
   case ISD::CopyFromReg: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1028,53 +1028,35 @@
     }
   }
 
-  // Split after an INLINEASM_BR block with outputs. This allows us to keep the
-  // copy to/from register instructions from being between two terminator
-  // instructions, which causes the machine instruction verifier agita.
-  auto TI = llvm::find_if(*BB, [](const MachineInstr &MI){
-    return MI.getOpcode() == TargetOpcode::INLINEASM_BR;
+  // Split after an INLINEASM_BR block with outputs. This gives us a place to
+  // store output values.
+  auto InlineAsmBr = llvm::find_if(BB->terminators(), [](MachineInstr &term) {
+    return term.getOpcode() == TargetOpcode::INLINEASM_BR;
   });
-  auto SplicePt = TI != BB->end() ? std::next(TI) : BB->end();
-  if (TI != BB->end() && SplicePt != BB->end() &&
-      TI->getOpcode() == TargetOpcode::INLINEASM_BR &&
-      SplicePt->getOpcode() == TargetOpcode::COPY) {
-    MachineBasicBlock *FallThrough = BB->getFallThrough();
-    if (!FallThrough)
-      for (const MachineOperand &MO : BB->back().operands())
-        if (MO.isMBB()) {
-          FallThrough = MO.getMBB();
-          break;
-        }
-    assert(FallThrough && "Cannot find default dest block for callbr!");
+  auto TermIter = detail::next_or_end(InlineAsmBr, BB->end());
+  if (InlineAsmBr != BB->end() && TermIter != BB->end() &&
+      TermIter->getOpcode() == TargetOpcode::TCOPY) {
+    do {
+      ++TermIter;
+    } while (TermIter != BB->end() &&
+             TermIter->getOpcode() == TargetOpcode::TCOPY);
+
+    MachineBasicBlock *DefaultTarget = BB->getInlineAsmBrDefaultTarget();
+    assert(DefaultTarget && "Cannot find default dest block for callbr!");
 
     MachineBasicBlock *CopyBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
     MachineFunction::iterator BBI(*BB);
     MF.insert(++BBI, CopyBB);
+    if (TermIter != BB->end())
+      CopyBB->splice(CopyBB->begin(), BB, TermIter, BB->end());
 
-    CopyBB->splice(CopyBB->begin(), BB, SplicePt, BB->end());
-    CopyBB->setInlineAsmBrDefaultTarget();
-
-    CopyBB->addSuccessor(FallThrough, BranchProbability::getOne());
-    BB->removeSuccessor(FallThrough);
+    CopyBB->addSuccessor(DefaultTarget, BranchProbability::getOne());
+    BB->removeSuccessor(DefaultTarget);
     BB->addSuccessor(CopyBB, BranchProbability::getOne());
 
-    // Mark all physical registers defined in the original block as being live
-    // on entry to the copy block.
-    for (const auto &MI : *CopyBB)
-      for (const MachineOperand &MO : MI.operands())
-        if (MO.isReg()) {
-          Register reg = MO.getReg();
-          if (Register::isPhysicalRegister(reg)) {
-            CopyBB->addLiveIn(reg);
-            break;
-          }
-        }
-
     CopyBB->normalizeSuccProbs();
     BB->normalizeSuccProbs();
 
-    BB->transferInlineAsmBrIndirectTargets(CopyBB);
-
     InsertPos = CopyBB->end();
     return CopyBB;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2865,7 +2865,6 @@
 
   // Retrieve successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
-  Return->setInlineAsmBrDefaultTarget();
 
   // Update successor info.
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -652,7 +652,7 @@
     // registers. Note that getDefIgnoringCopies does not ignore copies from
     // physical registers.
     MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
-    if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+    if (!RegDef || !RegDef->isCopy()) {
       LLVM_DEBUG(
           dbgs()
           << "... Parameter was not copied into a VReg, cannot tail call.\n");
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -4547,7 +4547,7 @@
   // have emitted a zero-extending load, but we need a sign-extending load.
   bool IsZExt = isa<ZExtInst>(I);
   const auto *LoadMI = MI;
-  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
+  if (LoadMI->isCopy() &&
       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
     Register LoadReg = MI->getOperand(1).getReg();
     LoadMI = MRI.getUniqueVRegDef(LoadReg);
@@ -4571,8 +4571,7 @@
         .addImm(AArch64::sub_32);
     Reg = Reg64;
   } else {
-    assert((MI->getOpcode() == TargetOpcode::COPY &&
-            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
+    assert((MI->isCopy() && MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
            "Expected copy instruction");
     Reg = MI->getOperand(1).getReg();
     MachineBasicBlock::iterator I(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -719,9 +719,8 @@
   }
 
   if (Subtarget.hasZeroCycleZeroingGP()) {
-    if (Opcode == TargetOpcode::COPY &&
-        (MI.getOperand(1).getReg() == AArch64::WZR ||
-         MI.getOperand(1).getReg() == AArch64::XZR))
+    if (MI.isCopy() && (MI.getOperand(1).getReg() == AArch64::WZR ||
+                        MI.getOperand(1).getReg() == AArch64::XZR))
       return true;
   }
 
@@ -1616,6 +1615,7 @@
   case AArch64::ANDXri:
     return MI.getOperand(1).getReg() == AArch64::XZR;
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
     return MI.getOperand(1).getReg() == AArch64::WZR;
   }
   return false;
@@ -1627,7 +1627,8 @@
   switch (MI.getOpcode()) {
   default:
     break;
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     // GPR32 copies will by lowered to ORRXrs
     Register DstReg = MI.getOperand(0).getReg();
     return (AArch64::GPR32RegClass.contains(DstReg) ||
@@ -1657,7 +1658,8 @@
   switch (MI.getOpcode()) {
   default:
     break;
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     // FPR64 copies will by lowered to ORR.16b
     Register DstReg = MI.getOperand(0).getReg();
     return (AArch64::FPR64RegClass.contains(DstReg) ||
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -3873,11 +3873,11 @@
     // We can skip over G_TRUNC since the condition is 1-bit.
     // Truncating/extending can have no impact on the value.
     unsigned Opc = CondDef->getOpcode();
-    if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
+    if (!CondDef->isCopy() && Opc != TargetOpcode::G_TRUNC)
       break;
 
     // Can't see past copies from physregs.
-    if (Opc == TargetOpcode::COPY &&
+    if (CondDef->isCopy() &&
         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
       return false;
 
@@ -5495,6 +5495,7 @@
   default:
     return true;
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::G_BITCAST:
   case TargetOpcode::G_TRUNC:
   case TargetOpcode::G_PHI:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -467,15 +467,13 @@
 bool AArch64RegisterBankInfo::hasFPConstraints(
     const MachineInstr &MI, const MachineRegisterInfo &MRI,
     const TargetRegisterInfo &TRI) const {
-  unsigned Op = MI.getOpcode();
-
   // Do we have an explicit floating point instruction?
-  if (isPreISelGenericFloatingPointOpcode(Op))
+  if (isPreISelGenericFloatingPointOpcode(MI.getOpcode()))
     return true;
 
   // No. Check if we have a copy-like instruction. If we do, then we could
   // still be fed by floating point instructions.
-  if (Op != TargetOpcode::COPY && !MI.isPHI())
+  if (!MI.isCopy() && !MI.isPHI())
     return false;
 
   // MI is copy-like. Return true if it outputs an FPR.
@@ -518,7 +516,7 @@
 
   // Try the default logic for non-generic instructions that are either copies
   // or already have some operands assigned to banks.
-  if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
+  if ((!MI.isCopy() && !isPreISelGenericOpcode(Opc)) ||
       Opc == TargetOpcode::G_PHI) {
     const RegisterBankInfo::InstructionMapping &Mapping =
         getInstrMappingImpl(MI);
@@ -569,7 +567,8 @@
                                    &ValMappings[Shift64Imm], 3);
     return getSameKindOfOperandsMapping(MI);
   }
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     Register DstReg = MI.getOperand(0).getReg();
     Register SrcReg = MI.getOperand(1).getReg();
     // Check if one of the register is not a generic register.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3820,8 +3820,7 @@
 
     // Move the COPY of the input reg to the beginning, so that we can use it.
     for (auto I = BB->begin(); I != &MI; I++) {
-      if (I->getOpcode() != TargetOpcode::COPY ||
-          I->getOperand(0).getReg() != InputReg)
+      if (!I->isCopy() || I->getOperand(0).getReg() != InputReg)
         continue;
 
       if (I == FirstMI) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -802,7 +802,7 @@
     else
       SubIdx = SubIndices[SubIndices.size() - Idx - 1];
 
-    if (Opcode == TargetOpcode::COPY) {
+    if (Opcode == TargetOpcode::COPY || Opcode == TargetOpcode::TCOPY) {
       copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
                   RI.getSubReg(SrcReg, SubIdx), KillSrc);
       continue;
diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp
--- a/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -739,7 +739,8 @@
       break;
     }
 
-    case TargetOpcode::COPY: {
+    case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY: {
       // COPY can transfer a smaller register into a wider one.
       // If that is the case, fill the remaining high bits with 0.
       RegisterRef RD = MI.getOperand(0);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1308,7 +1308,7 @@
     NextI = std::next(I);
     MachineInstr *MI = &*I;
 
-    if (MI->getOpcode() == TargetOpcode::COPY)
+    if (MI->isCopy())
       continue;
     if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
       continue;
@@ -1652,6 +1652,7 @@
 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
   switch (Opc) {
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
     case TargetOpcode::REG_SEQUENCE:
     case Hexagon::A4_combineir:
     case Hexagon::A4_combineri:
@@ -1675,6 +1676,7 @@
 
   switch (Opc) {
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
     case Hexagon::A2_tfr:
     case Hexagon::A2_tfrp: {
       BitTracker::RegisterRef RS = MI.getOperand(1);
@@ -2713,8 +2715,7 @@
     Defs.clear();
     HBS::getInstrDefs(*MI, Defs);
 
-    unsigned Opc = MI->getOpcode();
-    if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+    if (MI->isCopy() || MI->getOpcode() == TargetOpcode::REG_SEQUENCE)
       continue;
 
     if (MI->mayStore()) {
@@ -2987,6 +2988,7 @@
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
     case Hexagon::S2_lsr_i_r:
     case Hexagon::S2_asr_i_r:
     case Hexagon::S2_asl_i_r:
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -2091,6 +2091,7 @@
 
       switch (Opc) {
         case TargetOpcode::COPY:
+        case TargetOpcode::TCOPY:
           Changed |= expandCopy(B, I, MRI, HII, NewRegs);
           break;
         case Hexagon::STriw_pred:
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -212,6 +212,7 @@
       switch (Opc) {
         case Hexagon::C2_tfrpr:
         case TargetOpcode::COPY:
+        case TargetOpcode::TCOPY:
           if (isPredReg(MI->getOperand(1).getReg())) {
             RegisterSubReg RD = MI->getOperand(0);
             if (Register::isVirtualRegister(RD.R))
@@ -255,7 +256,7 @@
   MachineInstr *DefI = MRI->getVRegDef(Reg.R);
   assert(DefI);
   unsigned Opc = DefI->getOpcode();
-  if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
+  if (Opc == Hexagon::C2_tfrpr || DefI->isCopy()) {
     assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
     RegisterSubReg PR = DefI->getOperand(1);
     G2P.insert(std::make_pair(Reg, PR));
@@ -331,7 +332,8 @@
       return false;
     unsigned DefOpc = DefI->getOpcode();
     switch (DefOpc) {
-      case TargetOpcode::COPY: {
+      case TargetOpcode::COPY:
+      case TargetOpcode::TCOPY: {
         const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
         if (MRI->getRegClass(PR.R) != PredRC)
           return false;
@@ -468,7 +470,7 @@
 
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() != TargetOpcode::COPY)
+      if (!MI.isCopy())
         continue;
       RegisterSubReg DR = MI.getOperand(0);
       RegisterSubReg SR = MI.getOperand(1);
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1516,6 +1516,7 @@
   unsigned DOpc = DI->getOpcode();
   switch (DOpc) {
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
     case Hexagon::A2_tfrsi:
     case Hexagon::A2_tfrpi:
     case Hexagon::CONST32:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1019,9 +1019,10 @@
     } // for (Node : Results)
 
     assert(Node.Ty != MVT::Other);
-    SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
-                      ? Ops.front().getNode()
-                      : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
+    SDNode *ResN =
+        (Node.Opc == TargetOpcode::COPY || Node.Opc == TargetOpcode::TCOPY)
+            ? Ops.front().getNode()
+            : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
     Output.push_back(SDValue(ResN, 0));
   }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1031,7 +1031,8 @@
   };
 
   switch (Opc) {
-    case TargetOpcode::COPY: {
+    case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY: {
       MachineOperand &MD = MI.getOperand(0);
       MachineOperand &MS = MI.getOperand(1);
       MachineBasicBlock::iterator MBBI = MI.getIterator();
@@ -2359,6 +2360,7 @@
   case TargetOpcode::REG_SEQUENCE:
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::INLINEASM:
   case TargetOpcode::PHI:
     return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -111,6 +111,7 @@
   case TargetOpcode::REG_SEQUENCE:
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::INLINEASM:
   case TargetOpcode::INLINEASM_BR:
     break;
@@ -167,6 +168,7 @@
   case TargetOpcode::CFI_INSTRUCTION:
   case TargetOpcode::EH_LABEL:
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::INLINEASM:
   case TargetOpcode::INLINEASM_BR:
     break;
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -219,8 +219,7 @@
     // PHI can be anything after RA.
     // COPY can remateriaze things in between feeder, compare and nvj.
     if (MII->getOpcode() == TargetOpcode::KILL ||
-        MII->getOpcode() == TargetOpcode::PHI ||
-        MII->getOpcode() == TargetOpcode::COPY)
+        MII->getOpcode() == TargetOpcode::PHI || MII->isCopy())
       return false;
 
     // The following pseudo Hexagon instructions sets "use" and "def"
@@ -293,7 +292,7 @@
     MachineRegisterInfo &MRI = MF.getRegInfo();
     if (secondReg && !Register::isPhysicalRegister(cmpOp2)) {
       MachineInstr *def = MRI.getVRegDef(cmpOp2);
-      if (def->getOpcode() == TargetOpcode::COPY)
+      if (def->isCopy())
         return false;
     }
   }
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
--- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -172,6 +172,7 @@
 
     case TargetOpcode::PHI:
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
       break;
 
     case Hexagon::L2_loadrd_io:
@@ -322,6 +323,7 @@
           return 0;
       return 10;
     case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY:
       if (MI->getOperand(1).getSubReg() != 0)
         return 10;
       return 0;
@@ -1002,7 +1004,8 @@
 
   switch (Opc) {
     case TargetOpcode::PHI:
-    case TargetOpcode::COPY: {
+    case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY: {
       Register DstR = MI->getOperand(0).getReg();
       if (MRI->getRegClass(DstR) == DoubleRC) {
         createHalfInstr(Opc, MI, PairMap, isub_lo);
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -40,7 +40,8 @@
 bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
-    case TargetOpcode::COPY: {
+    case TargetOpcode::COPY:
+    case TargetOpcode::TCOPY: {
       const MachineOperand &Dst = MI->getOperand(0);
       const MachineOperand &Src = MI->getOperand(1);
       RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -185,7 +185,7 @@
   for (MachineInstr &UseMI : MRI.use_instructions(Reg)) {
     MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI);
     // Copy with many uses.
-    if (NonCopyInstr->getOpcode() == TargetOpcode::COPY &&
+    if (NonCopyInstr->isCopy() &&
         !Register::isPhysicalRegister(NonCopyInstr->getOperand(0).getReg()))
       addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI);
     else
@@ -207,7 +207,7 @@
   const MachineFunction &MF = *MI->getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineInstr *Ret = MI;
-  while (Ret->getOpcode() == TargetOpcode::COPY &&
+  while (Ret->isCopy() &&
          !Register::isPhysicalRegister(Ret->getOperand(0).getReg()) &&
          MRI.hasOneUse(Ret->getOperand(0).getReg())) {
     Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg()));
@@ -221,7 +221,7 @@
   const MachineFunction &MF = *MI->getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineInstr *Ret = MI;
-  while (Ret->getOpcode() == TargetOpcode::COPY &&
+  while (Ret->isCopy() &&
          !Register::isPhysicalRegister(Ret->getOperand(1).getReg()))
     Ret = MRI.getVRegDef(Ret->getOperand(1).getReg());
   return Ret;
@@ -324,7 +324,7 @@
 
     // Determine InstType from register bank of phys register that is
     // 'isDefUse ? def : use' of this copy.
-    if (AdjMI->getOpcode() == TargetOpcode::COPY) {
+    if (AdjMI->isCopy()) {
       setTypesAccordingToPhysicalRegister(MI, AdjMI, isDefUse ? 0 : 1);
       return true;
     }
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -153,6 +153,7 @@
       MBB.erase(I);
     return false;
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
     if (!expandCopy(MBB, I))
       return false;
     break;
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -174,7 +174,8 @@
     return true;
   }
   case NVPTX::nvvm_move_i64:
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
     if (Res) {
       InstrsToRemove.insert(&TexHandleDef);
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -235,7 +235,7 @@
 
   double getExtraCost(const MachineInstr *MI,
                       MachineRegisterInfo *MRI) const override {
-    assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY");
+    assert(MI->isCopy() && "Expected a COPY");
 
     for (auto &MO : MI->operands()) {
       // Physical registers will not be converted. Assume that converting the
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -384,8 +384,7 @@
   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
   for (MachineBasicBlock *MBB : RPOT)
     for (MachineInstr &MI : *MBB)
-      if (MI.getOpcode() == TargetOpcode::COPY &&
-          MI.getOperand(0).getReg() == X86::EFLAGS)
+      if (MI.isCopy() && MI.getOperand(0).getReg() == X86::EFLAGS)
         Copies.push_back(&MI);
 
   for (MachineInstr *CopyI : Copies) {
@@ -395,7 +394,7 @@
     assert(VOp.isReg() &&
            "The input to the copy for EFLAGS should always be a register!");
     MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
-    if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
+    if (!CopyDefI.isCopy()) {
       // FIXME: The big likely candidate here are PHI nodes. We could in theory
       // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
       // enough that it is probably better to change every other part of LLVM
@@ -624,7 +623,7 @@
           rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
         } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
-        } else if (MI.getOpcode() == TargetOpcode::COPY) {
+        } else if (MI.isCopy()) {
           rewriteCopy(MI, *FlagUse, CopyDefI);
         } else {
           // We assume all other instructions that use flags also def them.
@@ -716,9 +715,8 @@
 #ifndef NDEBUG
   for (MachineBasicBlock &MBB : MF)
     for (MachineInstr &MI : MBB)
-      if (MI.getOpcode() == TargetOpcode::COPY &&
-          (MI.getOperand(0).getReg() == X86::EFLAGS ||
-           MI.getOperand(1).getReg() == X86::EFLAGS)) {
+      if (MI.isCopy() && (MI.getOperand(0).getReg() == X86::EFLAGS ||
+                          MI.getOperand(1).getReg() == X86::EFLAGS)) {
         LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
                    MI.dump());
         llvm_unreachable("Unlowered EFLAGS copy!");
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -1458,7 +1458,8 @@
 
   switch (MI.getOpcode()) {
   default: llvm_unreachable("Unknown SpecialFP instruction!");
-  case TargetOpcode::COPY: {
+  case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY: {
     // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
     const MachineOperand &MO1 = MI.getOperand(1);
     const MachineOperand &MO0 = MI.getOperand(0);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -144,6 +144,7 @@
     // Some target-independent operations that trivially lower to data-invariant
     // instructions.
   case TargetOpcode::COPY:
+  case TargetOpcode::TCOPY:
   case TargetOpcode::INSERT_SUBREG:
   case TargetOpcode::SUBREG_TO_REG:
     return true;
diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll
--- a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0
 
 @X = common local_unnamed_addr global i32 0, align 4
 
@@ -9,6 +10,13 @@
 ; CHECK-LABEL: .LBB0_1: // %cleanup
 ; CHECK-LABEL: .Ltmp0:
 ; CHECK-LABEL: .LBB0_2: // %indirect
+
+; CHECK-O0-LABEL:  test1:
+; CHECK-O0:          .word b
+; CHECK-O0-NEXT:     .word .Ltmp1
+; CHECK-O0-LABEL: .Ltmp1:
+; CHECK-O0-LABEL: .LBB0_1: // %indirect
+; CHECK-O0-LABEL: .LBB0_2: // %cleanup
 entry:
   callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test1, %indirect))
           to label %cleanup [label %indirect]
@@ -22,7 +30,8 @@
 }
 
 define void @test2() {
-; CHECK-LABEL: test2:
+; CHECK-LABEL:    test2:
+; CHECK-O0-LABEL: test2:
 entry:
   %0 = load i32, i32* @X, align 4
   %and = and i32 %0, 1
@@ -34,6 +43,11 @@
 ; CHECK-NEXT:  .word .Ltmp2
 ; CHECK-LABEL: .Ltmp2:
 ; CHECK-NEXT:  .LBB1_3: // %if.end6
+
+; CHECK-O0:       .word b
+; CHECK-O0-NEXT:  .word .Ltmp3
+; CHECK-O0-LABEL: .Ltmp3:
+; CHECK-O0-NEXT:  .LBB1_3: // %if.end6
   callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test2, %if.end6))
           to label %if.then4 [label %if.end6]
 
@@ -50,6 +64,9 @@
 if.then9:
 ; CHECK-LABEL: .Ltmp4:
 ; CHECK-NEXT:  .LBB1_5: // %l_yes
+
+; CHECK-O0-LABEL:  .Ltmp5:
+; CHECK-O0-NEXT:   .LBB1_6: // %l_yes
   callbr void asm sideeffect "", "X"(i8* blockaddress(@test2, %l_yes))
           to label %if.end10 [label %l_yes]
 
diff --git a/llvm/test/CodeGen/SystemZ/asm-20.ll b/llvm/test/CodeGen/SystemZ/asm-20.ll
--- a/llvm/test/CodeGen/SystemZ/asm-20.ll
+++ b/llvm/test/CodeGen/SystemZ/asm-20.ll
@@ -1,6 +1,7 @@
 ; Test that asm goto can be compiled.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O0
 
 define i32 @c() {
 entry:
diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll
--- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0
 
 define i32 @test1(i32 %x) {
 ; CHECK-LABEL: test1:
@@ -9,6 +10,16 @@
 ; CHECK-NEXT:    callq foo
 ; CHECK-LABEL: .Ltmp0:
 ; CHECK-NEXT:  # %bb.2: # %baz
+
+; CHECK-O0-LABEL: test1:
+; CHECK-O0:         .quad .Ltmp0
+; CHECK-O0-NEXT:    .quad .Ltmp1
+; CHECK-O0-LABEL: .Ltmp1:
+; CHECK-O0-LABEL: .LBB0_2: # %bar
+; CHECK-O0-NEXT:    movl
+; CHECK-O0-NEXT:    callq foo
+; CHECK-O0-LABEL: .Ltmp0:
+; CHECK-O0-NEXT:  # %bb.3: # %baz
 entry:
   callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %baz), i8* blockaddress(@test1, %bar))
           to label %asm.fallthrough [label %bar]
diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-outputs-tcopy-spilling.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK
+
+%struct.kernel_rseq = type { i32, i32, i8*, i32, [12 x i8] }
+
+@__rseq_abi = external thread_local global %struct.kernel_rseq, align 32
+
+define i32 @test1(i8* %percpu_data, i64 %lock_value) #0 {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    movq %rdi, -16(%rbp)
+; CHECK-NEXT:    movq %rsi, -24(%rbp)
+; CHECK-NEXT:    movq -16(%rbp), %rax
+; CHECK-NEXT:    movq -24(%rbp), %rcx
+; CHECK-NEXT:    movq __rseq_abi@{{.*}}(%rip), %rdx
+; CHECK-NEXT:    movq %fs:0, %rsi
+; CHECK-NEXT:    leaq 8(%rsi,%rdx), %rdi
+; CHECK-NEXT:    leaq 4(%rsi,%rdx), %rdx
+; CHECK-NEXT:    #APP
+; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT:    leaq __rseq_cs_RseqFunction_PerCpuTryLock_0(%rip), %rsi
+; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT:    movl (%rdx), %r8d
+; CHECK-NEXT:    movl %r8d, %esi
+; CHECK-NEXT:    shlq $12, %rsi
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    cmpq $0, (%rsi)
+; CHECK-NEXT:    jne .Ltmp0
+; CHECK-NEXT:    movq %rcx, (%rsi)
+; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl %r8d, -40(%rbp) # 4-byte Spill
+; CHECK-NEXT:    movq %rsi, -48(%rbp) # 8-byte Spill
+; CHECK-NEXT: # %bb.4: # %entry
+; CHECK-NEXT:    jmp .LBB0_1
+entry:
+  %retval = alloca i32, align 4
+  %percpu_data.addr = alloca i8*, align 8
+  %lock_value.addr = alloca i64, align 8
+  %scratch = alloca i64, align 8
+  %cpu = alloca i32, align 4
+  store i8* %percpu_data, i8** %percpu_data.addr, align 8
+  store i64 %lock_value, i64* %lock_value.addr, align 8
+  %0 = load i8*, i8** %percpu_data.addr, align 8
+  %1 = load i64, i64* %lock_value.addr, align 8
+  %2 = callbr { i64, i32 } asm "3:\0Alea __rseq_cs_RseqFunction_PerCpuTryLock_${:uid}(%rip), $0\0Amov $0, ($2)\0A4:\0Amov ($3), $1\0Amov $1, ${0:k}\0Ashl $5, $0\0Aadd $6, $0\0Acmpq $$0, ($0)\0Ajne ${8:l}\0Amov $7, ($0)\0A5:", "=&r,=&r,r,r,n,n,r,r,X,~{cc},~{memory},~{dirflag},~{fpsr},~{flags}"(i8** getelementptr inbounds (%struct.kernel_rseq, %struct.kernel_rseq* @__rseq_abi, i32 0, i32 2), i32* getelementptr inbounds (%struct.kernel_rseq, %struct.kernel_rseq* @__rseq_abi, i32 0, i32 1), i32 1392848979, i32 12, i8* %0, i64 %1, i8* blockaddress(@test1, %fail_contended)) #1
+          to label %asm.fallthrough [label %fail_contended]
+
+asm.fallthrough:                                  ; preds = %entry
+  %asmresult = extractvalue { i64, i32 } %2, 0
+  %asmresult1 = extractvalue { i64, i32 } %2, 1
+  store i64 %asmresult, i64* %scratch, align 8
+  store i32 %asmresult1, i32* %cpu, align 4
+  %3 = load i32, i32* %cpu, align 4
+  store i32 %3, i32* %retval, align 4
+  br label %return
+
+fail_contended:                                   ; preds = %entry
+  store i32 -1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %fail_contended, %asm.fallthrough
+  %4 = load i32, i32* %retval, align 4
+  ret i32 %4
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
--- a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
@@ -51,8 +51,8 @@
 ; CHECK-NEXT:    jne .Ltmp1
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:  .LBB1_2: # %if.then
+; CHECK-NEXT:    addl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    addl %esi, %eax
 ; CHECK-NEXT:  .Ltmp2: # Block address taken
 ; CHECK-NEXT:  .LBB1_6: # %return
 ; CHECK-NEXT:    popl %esi