Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -62,6 +62,16 @@ VirtReg2IndexFunctor> VRegInfo; + /// The flag is true upon \p UpdatedCSRs initialization + /// and false otherwise. + bool IsUpdatedCSRsInitizialied; + + /// Contains the updated callee saved register list. + /// As opposed to the static list defined in register info, + /// all registers that were disabled (in CalleeSaveDisableRegs) + /// are removed from the list. + SmallVector UpdatedCSRs; + /// RegAllocHints - This vector records register allocation hints for virtual /// registers. For each virtual register, it keeps a register and hint type /// pair making up the allocation hint. Hint type is target specific except @@ -196,6 +206,16 @@ // Register Info //===--------------------------------------------------------------------===// + /// Disables the register from the list of CSRs. + /// I.e. the register will not appear as part of the CSR mask. + /// \see UpdatedCalleeSavedRegs. + void disableCalleeSavedRegister(unsigned Reg); + + /// Returns list of callee saved registers. + /// The function returns the updated CSR list (after taking into account + /// registers that are disabled from the CSR list). + const MCPhysReg *getCalleeSavedRegs() const; + // Strictly for use by MachineInstr.cpp. void addRegOperandToUseList(MachineOperand *MO); Index: include/llvm/CodeGen/RegisterClassInfo.h =================================================================== --- include/llvm/CodeGen/RegisterClassInfo.h +++ include/llvm/CodeGen/RegisterClassInfo.h @@ -53,10 +53,11 @@ // Callee saved registers of last MF. Assumed to be valid until the next // runOnFunction() call. - const MCPhysReg *CalleeSaved; + // Used only to determine if an update was made to CalleeSavedAliases. + const MCPhysReg *CalleeSavedRegs; - // Map register number to CalleeSaved index + 1; - SmallVector CSRNum; + // Map register alias to the callee saved Register. + SmallVector CalleeSavedAliases; // Reserved registers in the current MF. BitVector Reserved; @@ -105,11 +106,11 @@ } /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. + /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; + if (PhysReg < CalleeSavedAliases.size()) + return CalleeSavedAliases[PhysReg]; return 0; } Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -426,7 +426,8 @@ /// this target. The register should be in the order of desired callee-save /// stack frame offset. The first register is closest to the incoming stack /// pointer if stack grows down, and vice versa. - /// + /// Notice: In most cases you will want to use instead the function + /// getCalleeSavedRegs that is implemented in MachineRegisterInfo. virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; Index: lib/CodeGen/AggressiveAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -163,7 +163,8 @@ // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; + ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { Index: lib/CodeGen/CriticalAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/CriticalAntiDepBreaker.cpp +++ lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -71,7 +71,8 @@ // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; + ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Index: lib/CodeGen/LivePhysRegs.cpp =================================================================== --- lib/CodeGen/LivePhysRegs.cpp +++ lib/CodeGen/LivePhysRegs.cpp @@ -161,7 +161,9 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, const MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) { - for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) LiveRegs.addReg(*CSR); for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); @@ -180,7 +182,8 @@ if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I) addReg(*I); } else { addPristines(*this, MF, MFI, *TRI); Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -859,7 +859,9 @@ if (!isCalleeSavedInfoValid()) return BV; - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) BV.set(*CSR); // Saved CSRs are not pristine. Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -262,8 +262,21 @@ return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_RegisterLiveOut: { + // Shallow compare of the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + if (RegMask == OtherRegMask) + return true; + + // Calculate the size of the RegMask + const MachineFunction *MF = getParent()->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Deep compare of the two RegMasks + return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); + } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_CFIIndex: Index: lib/CodeGen/MachineRegisterInfo.cpp =================================================================== --- lib/CodeGen/MachineRegisterInfo.cpp +++ lib/CodeGen/MachineRegisterInfo.cpp @@ -28,7 +28,7 @@ void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) - : MF(MF), TheDelegate(nullptr), + : MF(MF), TheDelegate(nullptr), IsUpdatedCSRsInitizialied(false), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && EnableSubRegLiveness) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); @@ -543,3 +543,34 @@ } return false; } + +void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) { + + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + assert(Reg && (Reg < TRI->getNumRegs()) && + "Trying to disable an invalid register"); + + if (!IsUpdatedCSRsInitizialied) { + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); + for (const MCPhysReg *I = CSR; *I; ++I) + UpdatedCSRs.push_back(*I); + + // Zero value represents the end of the register list + // (no more registers should be pushed). + UpdatedCSRs.push_back(0); + + IsUpdatedCSRsInitizialied = true; + } + + // Remove the register (and its aliases from the list). + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI), + UpdatedCSRs.end()); +} + +const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const { + if (IsUpdatedCSRsInitizialied) + return UpdatedCSRs.data(); + + return getTargetRegisterInfo()->getCalleeSavedRegs(MF); +} Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -336,7 +336,7 @@ return; const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); + const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { Index: lib/CodeGen/RegAllocPBQP.cpp =================================================================== --- lib/CodeGen/RegAllocPBQP.cpp +++ lib/CodeGen/RegAllocPBQP.cpp @@ -554,7 +554,7 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, const MachineFunction &MF) { - const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSR[i] != 0; ++i) if (TRI.regsOverlap(reg, CSR[i])) return true; Index: lib/CodeGen/RegisterClassInfo.cpp =================================================================== --- lib/CodeGen/RegisterClassInfo.cpp +++ lib/CodeGen/RegisterClassInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; @@ -29,8 +30,7 @@ StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), cl::desc("Limit all regclasses to N registers")); -RegisterClassInfo::RegisterClassInfo() - : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} +RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(nullptr), TRI(nullptr) {} void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; @@ -48,18 +48,20 @@ // Does this MF have different CSRs? assert(TRI && "no register info set"); - const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); - if (Update || CSR != CalleeSaved) { - // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + + // Get the callee saved registers. + const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs(); + if (Update || CSR != CalleeSavedRegs) { + // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CSRNum.clear(); - CSRNum.resize(TRI->getNumRegs(), 0); - for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + CalleeSavedAliases.resize(TRI->getNumRegs(), 0); + for (const MCPhysReg *I = CSR; *I; ++I) + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) + CalleeSavedAliases[*AI] = *I; + Update = true; } - CalleeSaved = CSR; + CalleeSavedRegs = CSR; // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); @@ -103,7 +105,7 @@ unsigned Cost = TRI->getCostPerUse(PhysReg); MinCost = std::min(MinCost, Cost); - if (CSRNum[PhysReg]) + if (CalleeSavedAliases[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); else { Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -73,7 +73,7 @@ return; // Get the callee saved register list... - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1077,7 +1077,8 @@ CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals, + uint32_t *RegMask) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2205,6 +2205,11 @@ ++I, ++OutsIndex) { CCValAssign &VA = RVLocs[I]; assert(VA.isRegLoc() && "Can only return in registers!"); + + // Add the register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); + SDValue ValToCopy = OutVals[OutsIndex]; EVT ValVT = ValToCopy.getValueType(); @@ -2279,6 +2284,10 @@ assert(2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"); + + // Add the second register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); } @@ -2335,6 +2344,10 @@ // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); + + // Add the returned register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -2514,8 +2527,10 @@ SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { + SelectionDAG &DAG, SmallVectorImpl &InVals, + uint32_t *RegMask) const { + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget.is64Bit(); @@ -2529,6 +2544,14 @@ CCValAssign &VA = RVLocs[I]; EVT CopyVT = VA.getLocVT(); + // In some calling conventions we need to remove the used registers + // from the register mask. + if (RegMask && CallConv == CallingConv::X86_RegCall) { + for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } + // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { @@ -3172,6 +3195,12 @@ } } + if (CallConv == CallingConv::X86_RegCall) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) + MF.getRegInfo().disableCalleeSavedRegister(Pair.first); + } + return Chain; } @@ -3688,7 +3717,34 @@ Mask = RegInfo->getNoPreservedMask(); } - Ops.push_back(DAG.getRegisterMask(Mask)); + // Define a new register mask from the existing mask. + uint32_t *RegMask = nullptr; + + // In some calling conventions we need to remove the used physical registers + // from the reg mask. + if (CallConv == CallingConv::X86_RegCall) { + /// @todo Need to add support in MIPrinter and MIParser to represent + /// the customed RegMask. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Allocate a new Reg Mask and copy Mask. + RegMask = MF.allocateRegisterMask(TRI->getNumRegs()); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize); + + // Make sure all sub registers of the argument registers are reset + // in the RegMask. + for (auto const &RegPair : RegsToPass) + for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + + // Create the RegMask Operand according to our updated mask. + Ops.push_back(DAG.getRegisterMask(RegMask)); + } else { + // Create the RegMask Operand according to the static mask. + Ops.push_back(DAG.getRegisterMask(Mask)); + } if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3741,8 +3797,8 @@ // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, RegMask); } //===----------------------------------------------------------------------===// @@ -25521,8 +25577,7 @@ // N.B. the order the invoke BBs are processed in doesn't matter here. SmallVector MBBLPads; - const MCPhysReg *SavedRegs = - Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); for (MachineBasicBlock *MBB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. Index: lib/Target/X86/X86MachineFunctionInfo.cpp =================================================================== --- lib/Target/X86/X86MachineFunctionInfo.cpp +++ lib/Target/X86/X86MachineFunctionInfo.cpp @@ -9,6 +9,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -20,11 +21,8 @@ const X86RegisterInfo *RegInfo = static_cast( MF->getSubtarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); - for (const MCPhysReg *CSR = - RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF); - unsigned Reg = *CSR; - ++CSR) - { + for (const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs(); + unsigned Reg = *CSR; ++CSR) { if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) RestoreBasePointerOffset -= SlotSize; } Index: test/CodeGen/X86/DynamicCalleeSavedRegisters.ll =================================================================== --- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll +++ test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck --check-prefix=CHECK %s + +declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0); + +; In RegCall calling convention, ESI and EDI are callee saved registers. +; One might think that the caller could assume that ESI value is the same before +; and after calling the callee. +; However, RegCall also says that a register that was used for +; passing/returning argumnets, can be assumed to be modified by the callee. +; In other words, it is no longer a callee saved register. +; In this case we want to see that EDX/ECX values are saved and EDI/ESI are assumed +; to be modified by the callee. +; This is a hipe CC function that doesn't save any register for the caller. +; So we can be sure that there is no other reason to save EDX/ECX. +; The caller arguments are expected to be passed (in the following order) +; in registers: ESI, EBP, EAX, EDX and ECX. +define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) + %b2 = add i32 %b1, %d0 + %b3 = add i32 %b2, %e0 + ret i32 %b3 +} +; CHECK-LABEL: caller +; CHECK: subl $12, %esp +; CHECK-NEXT: movl %ecx, 8(%esp) +; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl 8(%esp), %ebp +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: calll callee +; CHECK-NEXT: leal (%eax,%ebx), %esi +; CHECK-NEXT: addl %ebp, %esi +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl + +!hipe.literals = !{ !0, !1, !2 } +!0 = !{ !"P_NSP_LIMIT", i32 120 } +!1 = !{ !"X86_LEAF_WORDS", i32 24 } +!2 = !{ !"AMD64_LEAF_WORDS", i32 18 } + +; Make sure that the callee doesn't save parameters that were passed as arguments. +; The caller arguments are expected to be passed (in the following order) +; in registers: EAX, ECX, EDX, EDI and ESI. +; The result will return in EAX, ECX and EDX. +define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = mul i32 7, %e0 + %b2 = udiv i32 5, %e0 + %b3 = mul i32 7, %d0 + %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0 + %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1 + %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2 + ret {i32, i32, i32} %b6 +} +; CHECK-LABEL: test_callee +; CHECK-NOT: pushl %esi +; CHECK-NOT: pushl %edi +; CHECK: retl Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -469,32 +469,27 @@ ret <32 x float> %x4 } -; X32-LABEL: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} +; X32-LABEL: testi32_inp +; X32: pushl {{%e(bx|bp)}} +; X32: pushl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} ; X32: retl -; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}} +; WIN64-LABEL: testi32_inp ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: retq -; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}} +; LINUXOSX64-LABEL: testi32_inp ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: retq ; Test regcall when running multiple input parameters - callee saved GPRs Index: test/CodeGen/X86/sse-regcall.ll =================================================================== --- test/CodeGen/X86/sse-regcall.ll +++ test/CodeGen/X86/sse-regcall.ll @@ -37,48 +37,42 @@ } ; WIN64-LABEL: testf32_inp -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; WIN64: retq ; WIN32-LABEL: testf32_inp -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill +; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload +; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload ; WIN32: retl ; LINUXOSX-LABEL: testf32_inp -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; LINUXOSX: retq ;test calling conventions - input parameters, callee saved XMMs @@ -93,10 +87,6 @@ ; WIN32-LABEL: testi32_inp ; WIN32: pushl {{%e(si|di|bx|bp)}} ; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: retl @@ -105,10 +95,6 @@ ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} @@ -117,10 +103,6 @@ ; LINUXOSX-LABEL: testi32_inp ; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: retq