Index: llvm/trunk/include/llvm/CodeGen/TailDuplicator.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TailDuplicator.h +++ llvm/trunk/include/llvm/CodeGen/TailDuplicator.h @@ -56,16 +56,18 @@ MachineBasicBlock *MBB); private: + typedef TargetInstrInfo::RegSubRegPair RegSubRegPair; + void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB); void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - DenseMap &LocalVRMap, - SmallVectorImpl> &Copies, + DenseMap &LocalVRMap, + SmallVectorImpl> &Copies, const DenseSet &UsedByPhi, bool Remove); void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, MachineFunction &MF, - DenseMap &LocalVRMap, + DenseMap &LocalVRMap, const DenseSet &UsedByPhi); void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl &TDBBs, @@ -79,6 +81,9 @@ MachineBasicBlock *TailBB, SmallVectorImpl &TDBBs, SmallVectorImpl &Copies); + void appendCopies(MachineBasicBlock *MBB, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies); void removeDeadBlock(MachineBasicBlock *MBB); }; Index: llvm/trunk/lib/CodeGen/TailDuplicator.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TailDuplicator.cpp +++ llvm/trunk/lib/CodeGen/TailDuplicator.cpp @@ -303,20 +303,21 @@ /// source register that's contributed by PredBB and update SSA update map. void TailDuplicator::processPHI( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - DenseMap &LocalVRMap, - SmallVectorImpl> &Copies, + DenseMap &LocalVRMap, + SmallVectorImpl> &Copies, const DenseSet &RegsUsedByPhi, bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); - LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); // Insert a copy from source to the end of the block. The def register is the // available value liveout of the block. unsigned NewDef = MRI->createVirtualRegister(RC); - Copies.push_back(std::make_pair(NewDef, SrcReg)); + Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) addSSAUpdateEntry(DefReg, NewDef, PredBB); @@ -334,7 +335,8 @@ /// the source operands due to earlier PHI translation. void TailDuplicator::duplicateInstruction( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - MachineFunction &MF, DenseMap &LocalVRMap, + MachineFunction &MF, + DenseMap &LocalVRMap, const DenseSet &UsedByPhi) { MachineInstr *NewMI = TII->duplicate(MI, MF); if (PreRegAlloc) { @@ -349,17 +351,63 @@ const TargetRegisterClass *RC = MRI->getRegClass(Reg); unsigned NewReg = MRI->createVirtualRegister(RC); MO.setReg(NewReg); - LocalVRMap.insert(std::make_pair(Reg, NewReg)); + LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) addSSAUpdateEntry(Reg, NewReg, PredBB); } else { - DenseMap::iterator VI = LocalVRMap.find(Reg); + auto VI = LocalVRMap.find(Reg); if (VI != LocalVRMap.end()) { - MO.setReg(VI->second); + // Need to make sure that the register class of the mapped register + // will satisfy the constraints of the class of the register being + // replaced. + auto *OrigRC = MRI->getRegClass(Reg); + auto *MappedRC = MRI->getRegClass(VI->second.Reg); + const TargetRegisterClass *ConstrRC; + if (VI->second.SubReg != 0) { + ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, + VI->second.SubReg); + if (ConstrRC) { + // The actual constraining (as in "find appropriate new class") + // is done by getMatchingSuperRegClass, so now we only need to + // change the class of the mapped register. + MRI->setRegClass(VI->second.Reg, ConstrRC); + } + } else { + // For mapped registers that do not have sub-registers, simply + // restrict their class to match the original one. + ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); + } + + if (ConstrRC) { + // If the class constraining succeeded, we can simply replace + // the old register with the mapped one. + MO.setReg(VI->second.Reg); + // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a + // sub-register, we need to compose the sub-register indices. + MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), + VI->second.SubReg)); + } else { + // The direct replacement is not possible, due to failing register + // class constraints. An explicit COPY is necessary. Create one + // that can be reused + auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); + if (NewRC == nullptr) + NewRC = OrigRC; + unsigned NewReg = MRI->createVirtualRegister(NewRC); + BuildMI(*PredBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewReg) + .addReg(VI->second.Reg, 0, VI->second.SubReg); + LocalVRMap.erase(VI); + LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); + MO.setReg(NewReg); + // The composed VI.Reg:VI.SubReg is replaced with NewReg, which + // is equivalent to the whole register Reg. Hence, Reg:subreg + // is same as NewReg:subreg, so keep the sub-register index + // unchanged. + } // Clear any kill flags from this operand. The new register could // have uses after this one, so kills are not valid here. MO.setIsKill(false); - MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); } } } @@ -734,8 +782,8 @@ } // Clone the contents of TailBB into PredBB. - DenseMap LocalVRMap; - SmallVector, 4> CopyInfos; + DenseMap LocalVRMap; + SmallVector, 4> CopyInfos; // Use instr_iterator here to properly handle bundles, e.g. // ARM Thumb2 IT block. MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); @@ -752,12 +800,7 @@ duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } - MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), CopyInfos[i].first) - .addReg(CopyInfos[i].second)); - } + appendCopies(PredBB, CopyInfos, Copies); // Simplify TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); @@ -792,8 +835,8 @@ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { - DenseMap LocalVRMap; - SmallVector, 4> CopyInfos; + DenseMap LocalVRMap; + SmallVector, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { @@ -812,13 +855,7 @@ duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } - MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), - CopyInfos[i].first) - .addReg(CopyInfos[i].second)); - } + appendCopies(PrevBB, CopyInfos, Copies); } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); @@ -863,8 +900,8 @@ if (PredBB->succ_size() != 1) continue; - DenseMap LocalVRMap; - SmallVector, 4> CopyInfos; + DenseMap LocalVRMap; + SmallVector, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { @@ -873,17 +910,26 @@ MachineInstr *MI = &*I++; processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } - MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), CopyInfos[i].first) - .addReg(CopyInfos[i].second)); - } + appendCopies(PredBB, CopyInfos, Copies); } return Changed; } +/// At the end of the block \p MBB generate COPY instructions between registers +/// described by \p CopyInfos. Append resulting instructions to \p Copies. +void TailDuplicator::appendCopies(MachineBasicBlock *MBB, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); + for (auto &CI : CopyInfos) { + auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) + .addReg(CI.second.Reg, 0, CI.second.SubReg); + Copies.push_back(C); + } +} + /// Remove the specified dead machine basic block from the function, updating /// the CFG. void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { Index: llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll =================================================================== --- llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll +++ llvm/trunk/test/CodeGen/Hexagon/tail-dup-subreg-map.ll @@ -0,0 +1,67 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: asserts + +; When tail-duplicating a block with PHI nodes that use subregisters, the +; subregisters were dropped by the tail duplicator, resulting in invalid +; COPY instructions being generated. + +; CHECK: = extractu(r{{[0-9]+}}, #15, #17) + +target triple = "hexagon" + +%struct.0 = type { i64, i16 } +%struct.1 = type { i64, i64 } + +declare hidden fastcc void @foo(%struct.0* noalias nocapture, i8 signext, i8 zeroext, i32, i64, i64) unnamed_addr #0 + +define void @fred(%struct.0* noalias nocapture sret %agg.result, %struct.1* byval nocapture readonly align 8 %a) #1 { +entry: + %0 = load i64, i64* undef, align 8 + switch i32 undef, label %if.else [ + i32 32767, label %if.then + i32 0, label %if.then7 + ] + +if.then: ; preds = %entry + ret void + +if.then7: ; preds = %entry + br i1 undef, label %if.then.i, label %if.else16.i + +if.then.i: ; preds = %if.then7 + br i1 undef, label %if.then5.i, label %if.else.i + +if.then5.i: ; preds = %if.then.i + %shl.i21 = shl i64 %0, 0 + br label %if.end.i + +if.else.i: ; preds = %if.then.i + %shl12.i = shl i64 %0, undef + br label %if.end.i + +if.end.i: ; preds = %if.else.i, %if.then5.i + %aSig0.0 = phi i64 [ undef, %if.then5.i ], [ %shl12.i, %if.else.i ] + %storemerge43.i = phi i64 [ %shl.i21, %if.then5.i ], [ 0, %if.else.i ] + %sub15.i = sub nsw i32 -63, undef + br label %if.end13 + +if.else16.i: ; preds = %if.then7 + br label %if.end13 + +if.else: ; preds = %entry + %or12 = or i64 undef, 281474976710656 + br label %if.end13 + +if.end13: ; preds = %if.else, %if.else16.i, %if.end.i + %aSig1.1 = phi i64 [ %0, %if.else ], [ %storemerge43.i, %if.end.i ], [ undef, %if.else16.i ] + %aSig0.2 = phi i64 [ %or12, %if.else ], [ %aSig0.0, %if.end.i ], [ undef, %if.else16.i ] + %aExp.0 = phi i32 [ undef, %if.else ], [ %sub15.i, %if.end.i ], [ undef, %if.else16.i ] + %shl2.i = shl i64 %aSig0.2, 15 + %shr.i = lshr i64 %aSig1.1, 49 + %or.i = or i64 %shl2.i, %shr.i + tail call fastcc void @foo(%struct.0* noalias %agg.result, i8 signext 80, i8 zeroext undef, i32 %aExp.0, i64 %or.i, i64 undef) + unreachable +} + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind }