Index: lib/CodeGen/RegUsageInfoCollector.cpp =================================================================== --- lib/CodeGen/RegUsageInfoCollector.cpp +++ lib/CodeGen/RegUsageInfoCollector.cpp @@ -56,6 +56,10 @@ bool runOnMachineFunction(MachineFunction &MF) override; + // Call determineCalleeSaves and then also set the bits for subregs and + // fully saved superregs. + static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF); + static char ID; }; } // end of anonymous namespace @@ -103,6 +107,9 @@ LLVM_DEBUG(dbgs() << "Clobbered Registers: "); + BitVector SavedRegs; + computeCalleeSavedRegs(SavedRegs, MF); + const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask(); auto SetRegAsDefined = [&RegMask] (unsigned Reg) { RegMask[Reg / 32] &= ~(1u << Reg % 32); @@ -110,11 +117,15 @@ // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + // Don't count registers that are saved and restored. + if (SavedRegs.test(PReg)) + continue; // If a register is defined by an instruction mark it as defined together - // with all it's aliases. + // with all it's unsaved aliases. if (!MRI->def_empty(PReg)) { for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) - SetRegAsDefined(*AI); + if (!SavedRegs.test(*AI)) + SetRegAsDefined(*AI); continue; } // If a register is in the UsedPhysRegsMask set then mark it as defined. @@ -124,15 +135,7 @@ SetRegAsDefined(PReg); } - if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { - const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, F.getCallingConv()); - if (CallPreservedMask) { - // Set callee saved register as preserved. - for (unsigned i = 0; i < RegMaskSize; ++i) - RegMask[i] = RegMask[i] | CallPreservedMask[i]; - } - } else { + if (TargetFrameLowering::isSafeForNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); @@ -148,3 +151,48 @@ return false; } + +void RegUsageInfoCollector:: +computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + // Target will return the set of registers that it saves/restores as needed. + SavedRegs.clear(); + TFI->determineCalleeSaves(MF, SavedRegs); + + // Insert subregs. + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) + for (MCSubRegIterator SR(Reg, TRI, false); SR.isValid(); ++SR) + SavedRegs.set(*SR); + } + + // Insert any register fully saved via subregisters. + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (SavedRegs.test(PReg)) + continue; + + // Check if PReg is fully covered by its subregs. + bool CoveredBySubRegs = false; + for (const TargetRegisterClass *RC : TRI->regclasses()) + if (RC->CoveredBySubRegs && RC->contains(PReg)) { + CoveredBySubRegs = true; + break; + } + if (!CoveredBySubRegs) + continue; + + // Add PReg to SavedRegs if all subregs are saved. + bool AllSubRegsSaved = true; + for (MCSubRegIterator SR(PReg, TRI, false); SR.isValid(); ++SR) + if (!SavedRegs.test(*SR)) { + AllSubRegsSaved = false; + break; + } + if (AllSubRegsSaved) + SavedRegs.set(PReg); + } +} Index: test/CodeGen/SystemZ/ipra-04.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/ipra-04.ll @@ -0,0 +1,37 @@ +; Test that the updated regmask on the call to @fun1 perserves %r14 and +; %15. @fun1 will save and restore these registers since it contains a call. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -enable-ipra -print-regmask-num-regs=-1 \ +; RUN: -debug-only=ip-regalloc 2>&1 < %s | FileCheck --check-prefix=DBG %s +; +; DBG: fun1 function optimized for not having CSR +; DBG: Call Instruction After Register Usage Info Propagation : CallBRASL @fun1{{.*}} $r14d $r15d + +%0 = type { [3 x i64] } + +; Function Attrs: norecurse nounwind +declare dso_local fastcc signext i32 @foo(i16*, i32 signext) unnamed_addr + +; Function Attrs: norecurse nounwind +define internal fastcc void @fun1(i16*, i16* nocapture) unnamed_addr #0 { + %3 = load i16, i16* undef, align 2 + %4 = shl i16 %3, 4 + %5 = tail call fastcc signext i32 @foo(i16* nonnull %0, i32 signext 5) + %6 = or i16 0, %4 + %7 = or i16 %6, 0 + store i16 %7, i16* undef, align 2 + %8 = getelementptr inbounds i16, i16* %0, i64 5 + %9 = load i16, i16* %8, align 2 + store i16 %9, i16* %1, align 2 + ret void +} + +; Function Attrs: nounwind +define fastcc void @fun0(i8* nocapture readonly, i16* nocapture, i32 signext) unnamed_addr { + %a = alloca i8, i64 undef + call fastcc void @fun1(i16* nonnull undef, i16* %1) + ret void +} + +attributes #0 = { norecurse nounwind "no-frame-pointer-elim"="false" } +