Index: llvm/trunk/include/llvm/CodeGen/TailDuplicator.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/TailDuplicator.h
+++ llvm/trunk/include/llvm/CodeGen/TailDuplicator.h
@@ -56,16 +56,18 @@
                               MachineBasicBlock *MBB);
 
 private:
+  typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
+
   void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
                          MachineBasicBlock *BB);
   void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
                   MachineBasicBlock *PredBB,
-                  DenseMap<unsigned, unsigned> &LocalVRMap,
-                  SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
+                  DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+                  SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
                   const DenseSet<unsigned> &UsedByPhi, bool Remove);
   void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
                             MachineBasicBlock *PredBB, MachineFunction &MF,
-                            DenseMap<unsigned, unsigned> &LocalVRMap,
+                            DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
                             const DenseSet<unsigned> &UsedByPhi);
   void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                             SmallVectorImpl<MachineBasicBlock *> &TDBBs,
@@ -79,6 +81,9 @@
                      MachineBasicBlock *TailBB,
                      SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                      SmallVectorImpl<MachineInstr *> &Copies);
+  void appendCopies(MachineBasicBlock *MBB,
+                 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+                 SmallVectorImpl<MachineInstr *> &Copies);
 
   void removeDeadBlock(MachineBasicBlock *MBB);
 };
Index: llvm/trunk/lib/CodeGen/TailDuplicator.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/TailDuplicator.cpp
+++ llvm/trunk/lib/CodeGen/TailDuplicator.cpp
@@ -303,20 +303,21 @@
 /// source register that's contributed by PredBB and update SSA update map.
 void TailDuplicator::processPHI(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
-    DenseMap<unsigned, unsigned> &LocalVRMap,
-    SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
+    DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+    SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
     const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
   unsigned DefReg = MI->getOperand(0).getReg();
   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
   assert(SrcOpIdx && "Unable to find matching PHI source?");
   unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+  unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
-  LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+  LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
 
   // Insert a copy from source to the end of the block. The def register is the
   // available value liveout of the block.
   unsigned NewDef = MRI->createVirtualRegister(RC);
-  Copies.push_back(std::make_pair(NewDef, SrcReg));
+  Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
   if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
     addSSAUpdateEntry(DefReg, NewDef, PredBB);
 
@@ -334,7 +335,8 @@
 /// the source operands due to earlier PHI translation.
 void TailDuplicator::duplicateInstruction(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
-    MachineFunction &MF, DenseMap<unsigned, unsigned> &LocalVRMap,
+    MachineFunction &MF,
+    DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
     const DenseSet<unsigned> &UsedByPhi) {
   MachineInstr *NewMI = TII->duplicate(MI, MF);
   if (PreRegAlloc) {
@@ -349,17 +351,63 @@
         const TargetRegisterClass *RC = MRI->getRegClass(Reg);
         unsigned NewReg = MRI->createVirtualRegister(RC);
         MO.setReg(NewReg);
-        LocalVRMap.insert(std::make_pair(Reg, NewReg));
+        LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
         if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
           addSSAUpdateEntry(Reg, NewReg, PredBB);
       } else {
-        DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+        auto VI = LocalVRMap.find(Reg);
         if (VI != LocalVRMap.end()) {
-          MO.setReg(VI->second);
+          // Need to make sure that the register class of the mapped register
+          // will satisfy the constraints of the class of the register being
+          // replaced.
+          auto *OrigRC = MRI->getRegClass(Reg);
+          auto *MappedRC = MRI->getRegClass(VI->second.Reg);
+          const TargetRegisterClass *ConstrRC;
+          if (VI->second.SubReg != 0) {
+            ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
+                                                     VI->second.SubReg);
+            if (ConstrRC) {
+              // The actual constraining (as in "find appropriate new class")
+              // is done by getMatchingSuperRegClass, so now we only need to
+              // change the class of the mapped register.
+              MRI->setRegClass(VI->second.Reg, ConstrRC);
+            }
+          } else {
+            // For mapped registers that do not have sub-registers, simply
+            // restrict their class to match the original one.
+            ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
+          }
+
+          if (ConstrRC) {
+            // If the class constraining succeeded, we can simply replace
+            // the old register with the mapped one.
+            MO.setReg(VI->second.Reg);
+            // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
+            // sub-register, we need to compose the sub-register indices.
+            MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
+                                                   VI->second.SubReg));
+          } else {
+            // The direct replacement is not possible, due to failing register
+            // class constraints. An explicit COPY is necessary. Create one
+            // that can be reused
+            auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
+            if (NewRC == nullptr)
+              NewRC = OrigRC;
+            unsigned NewReg = MRI->createVirtualRegister(NewRC);
+            BuildMI(*PredBB, MI, MI->getDebugLoc(),
+                    TII->get(TargetOpcode::COPY), NewReg)
+                .addReg(VI->second.Reg, 0, VI->second.SubReg);
+            LocalVRMap.erase(VI);
+            LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+            MO.setReg(NewReg);
+            // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
+            // is equivalent to the whole register Reg. Hence, Reg:subreg
+            // is same as NewReg:subreg, so keep the sub-register index
+            // unchanged.
+          }
           // Clear any kill flags from this operand.  The new register could
           // have uses after this one, so kills are not valid here.
           MO.setIsKill(false);
-          MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
         }
       }
     }
@@ -734,8 +782,8 @@
     }
 
     // Clone the contents of TailBB into PredBB.
-    DenseMap<unsigned, unsigned> LocalVRMap;
-    SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
+    DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+    SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
     // Use instr_iterator here to properly handle bundles, e.g.
     // ARM Thumb2 IT block.
     MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
@@ -752,12 +800,7 @@
         duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
       }
     }
-    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
-    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
-      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
-                               TII->get(TargetOpcode::COPY), CopyInfos[i].first)
-                           .addReg(CopyInfos[i].second));
-    }
+    appendCopies(PredBB, CopyInfos, Copies);
 
     // Simplify
     TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
@@ -792,8 +835,8 @@
     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
                  << "From MBB: " << *TailBB);
     if (PreRegAlloc) {
-      DenseMap<unsigned, unsigned> LocalVRMap;
-      SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
+      DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+      SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
       MachineBasicBlock::iterator I = TailBB->begin();
       // Process PHI instructions first.
       while (I != TailBB->end() && I->isPHI()) {
@@ -812,13 +855,7 @@
         duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
-      MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
-      for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
-        Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
-                                 TII->get(TargetOpcode::COPY),
-                                 CopyInfos[i].first)
-                             .addReg(CopyInfos[i].second));
-      }
+      appendCopies(PrevBB, CopyInfos, Copies);
     } else {
       // No PHIs to worry about, just splice the instructions over.
       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
@@ -863,8 +900,8 @@
     if (PredBB->succ_size() != 1)
       continue;
 
-    DenseMap<unsigned, unsigned> LocalVRMap;
-    SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
+    DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+    SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
     MachineBasicBlock::iterator I = TailBB->begin();
     // Process PHI instructions first.
     while (I != TailBB->end() && I->isPHI()) {
@@ -873,17 +910,26 @@
       MachineInstr *MI = &*I++;
       processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
     }
-    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
-    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
-      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
-                               TII->get(TargetOpcode::COPY), CopyInfos[i].first)
-                           .addReg(CopyInfos[i].second));
-    }
+    appendCopies(PredBB, CopyInfos, Copies);
   }
 
   return Changed;
 }
 
+/// At the end of the block \p MBB generate COPY instructions between registers
+/// described by \p CopyInfos. Append resulting instructions to \p Copies.
+void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
+      SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+      SmallVectorImpl<MachineInstr*> &Copies) {
+  MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+  const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
+  for (auto &CI : CopyInfos) {
+    auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
+                .addReg(CI.second.Reg, 0, CI.second.SubReg);
+    Copies.push_back(C);
+  }
+}
+
 /// Remove the specified dead machine basic block from the function, updating
 /// the CFG.
 void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
Index: llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
===================================================================
--- llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
+++ llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; REQUIRES: asserts
+
+; When tail-duplicating a block with PHI nodes that use subregisters, the
+; subregisters were dropped by the tail duplicator, resulting in invalid
+; COPY instructions being generated.
+
+; CHECK: = extractu(r{{[0-9]+}}, #15, #17)
+
+target triple = "hexagon"
+
+%struct.0 = type { i64, i16 }
+%struct.1 = type { i64, i64 }
+
+declare hidden fastcc void @foo(%struct.0* noalias nocapture, i8 signext, i8 zeroext, i32, i64, i64) unnamed_addr #0
+
+define void @fred(%struct.0* noalias nocapture sret %agg.result, %struct.1* byval nocapture readonly align 8 %a) #1 {
+entry:
+  %0 = load i64, i64* undef, align 8
+  switch i32 undef, label %if.else [
+    i32 32767, label %if.then
+    i32 0, label %if.then7
+  ]
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.then7:                                         ; preds = %entry
+  br i1 undef, label %if.then.i, label %if.else16.i
+
+if.then.i:                                        ; preds = %if.then7
+  br i1 undef, label %if.then5.i, label %if.else.i
+
+if.then5.i:                                       ; preds = %if.then.i
+  %shl.i21 = shl i64 %0, 0
+  br label %if.end.i
+
+if.else.i:                                        ; preds = %if.then.i
+  %shl12.i = shl i64 %0, undef
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.else.i, %if.then5.i
+  %aSig0.0 = phi i64 [ undef, %if.then5.i ], [ %shl12.i, %if.else.i ]
+  %storemerge43.i = phi i64 [ %shl.i21, %if.then5.i ], [ 0, %if.else.i ]
+  %sub15.i = sub nsw i32 -63, undef
+  br label %if.end13
+
+if.else16.i:                                      ; preds = %if.then7
+  br label %if.end13
+
+if.else:                                          ; preds = %entry
+  %or12 = or i64 undef, 281474976710656
+  br label %if.end13
+
+if.end13:                                         ; preds = %if.else, %if.else16.i, %if.end.i
+  %aSig1.1 = phi i64 [ %0, %if.else ], [ %storemerge43.i, %if.end.i ], [ undef, %if.else16.i ]
+  %aSig0.2 = phi i64 [ %or12, %if.else ], [ %aSig0.0, %if.end.i ], [ undef, %if.else16.i ]
+  %aExp.0 = phi i32 [ undef, %if.else ], [ %sub15.i, %if.end.i ], [ undef, %if.else16.i ]
+  %shl2.i = shl i64 %aSig0.2, 15
+  %shr.i = lshr i64 %aSig1.1, 49
+  %or.i = or i64 %shl2.i, %shr.i
+  tail call fastcc void @foo(%struct.0* noalias %agg.result, i8 signext 80, i8 zeroext undef, i32 %aExp.0, i64 %or.i, i64 undef)
+  unreachable
+}
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind }