Index: include/llvm/CodeGen/MachineFunction.h =================================================================== --- include/llvm/CodeGen/MachineFunction.h +++ include/llvm/CodeGen/MachineFunction.h @@ -678,6 +678,14 @@ return Mask; } + /// Allocates and initializes a register list for \p NumRegister registers. + MCPhysReg *allocateRegisterList(unsigned NumRegisters) { + MCPhysReg *RegList = Allocator.Allocate(NumRegisters); + for (unsigned I = 0; I != NumRegisters; ++I) + RegList[I] = 0; + return RegList; + } + /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand /// pointers. This array is owned by the MachineFunction. MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num); Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -62,6 +62,18 @@ VirtReg2IndexFunctor> VRegInfo; + /// CalleeSaveDisableRegs - Lists physical registers that should not be + /// preserved. + /// Some calling conventions disable CSRs that are passed to the + /// function as arguments or are returned by the function. + BitVector CalleeSaveDisableRegs; + + // This list contains the updated callee saved register list. + // On contray to the static list defined in register info, the + // all registers that were disabled (in CalleeSaveDisableRegs) + // are removed from the list. + MCPhysReg *UpdatedCalleeSavedRegs; + /// RegAllocHints - This vector records register allocation hints for virtual /// registers. For each virtual register, it keeps a register and hint type /// pair making up the allocation hint. Hint type is target specific except @@ -117,6 +129,13 @@ MachineRegisterInfo(const MachineRegisterInfo&) = delete; void operator=(const MachineRegisterInfo&) = delete; + + /// \Returns a bit vector of registers that should not be + /// callee saved. + const BitVector &getCalleeSaveDisabledRegs() const { + return CalleeSaveDisableRegs; + } + public: explicit MachineRegisterInfo(MachineFunction *MF); @@ -196,6 +215,23 @@ // Register Info //===--------------------------------------------------------------------===// + /// Sets a new register (and its aliases) in the CalleeSaveDisableRegs bit + /// vector. + void addCalleeSaveDisableRegister(unsigned Reg) { + assert(Reg < CalleeSaveDisableRegs.size() && + "Trying to disable an invalif register"); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + CalleeSaveDisableRegs.set(*SubRegs); + } + + // The function returns an updated CSR list (after taking into account + // registers that are disabled from the CSR list). + // The function is lazy allocating the new list and saves it in + // UpdatedCalleeSavedRegs. + const MCPhysReg *getUpdatedCalleeSavedRegs(); + // Strictly for use by MachineInstr.cpp. void addRegOperandToUseList(MachineOperand *MO); Index: include/llvm/CodeGen/RegisterClassInfo.h =================================================================== --- include/llvm/CodeGen/RegisterClassInfo.h +++ include/llvm/CodeGen/RegisterClassInfo.h @@ -52,11 +52,11 @@ const TargetRegisterInfo *TRI; // Callee saved registers of last MF. Assumed to be valid until the next - // runOnFunction() call. - const MCPhysReg *CalleeSaved; + // runOnFunction() call. Used only to determine if an update was made. + SmallVector CalleeSavedRegs; - // Map register number to CalleeSaved index + 1; - SmallVector CSRNum; + // Map register alias to the callee saved Register. + SmallVector CalleeSavedAliases; // Reserved registers in the current MF. BitVector Reserved; @@ -105,11 +105,11 @@ } /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. + /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; + if (PhysReg < CalleeSavedAliases.size()) + return CalleeSavedAliases[PhysReg]; return 0; } Index: lib/CodeGen/AggressiveAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -158,12 +158,14 @@ } } - // Mark live-out callee-saved registers. In a return block this is + // Mark live-out callee-saved registers (that are not + // passed/returned as arguments). In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getUpdatedCalleeSavedRegs(); *I; + ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { Index: lib/CodeGen/CriticalAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/CriticalAntiDepBreaker.cpp +++ lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -66,12 +66,14 @@ } } - // Mark live-out callee-saved registers. In a return block this is + // Mark live-out callee-saved registers (that are not + // passed/returned as arguments). In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getUpdatedCalleeSavedRegs(); *I; + ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Index: lib/CodeGen/LivePhysRegs.cpp =================================================================== --- lib/CodeGen/LivePhysRegs.cpp +++ lib/CodeGen/LivePhysRegs.cpp @@ -161,7 +161,9 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, const MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) { - for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + MachineRegisterInfo &MRI = const_cast(MF.getRegInfo()); + for (const MCPhysReg *CSR = MRI.getUpdatedCalleeSavedRegs(); CSR && *CSR; + ++CSR) LiveRegs.addReg(*CSR); for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); @@ -175,12 +177,13 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = const_cast(MF.getRegInfo()); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) { if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + for (const MCPhysReg *I = MRI.getUpdatedCalleeSavedRegs(); *I; ++I) addReg(*I); } else { addPristines(*this, MF, MFI, *TRI); Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -859,7 +859,9 @@ if (!isCalleeSavedInfoValid()) return BV; - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + MachineRegisterInfo &MRI = const_cast(MF.getRegInfo()); + for (const MCPhysReg *CSR = MRI.getUpdatedCalleeSavedRegs(); CSR && *CSR; + ++CSR) BV.set(*CSR); // Saved CSRs are not pristine. Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -262,8 +262,21 @@ return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_RegisterLiveOut: { + // Calculate the size of the RegMask + const MachineFunction *MF = getParent()->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Compare the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + for (unsigned I = 0, E = RegMaskSize; I < E; ++I) + if (RegMask[I] != OtherRegMask[I]) + return false; + + return true; + } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_CFIIndex: Index: lib/CodeGen/MachineRegisterInfo.cpp =================================================================== --- lib/CodeGen/MachineRegisterInfo.cpp +++ lib/CodeGen/MachineRegisterInfo.cpp @@ -35,6 +35,8 @@ VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegMask.resize(NumRegs); + CalleeSaveDisableRegs.resize(NumRegs); + UpdatedCalleeSavedRegs = nullptr; PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); } @@ -543,3 +545,29 @@ } return false; } + +const MCPhysReg *MachineRegisterInfo::getUpdatedCalleeSavedRegs() { + if (UpdatedCalleeSavedRegs != nullptr) + return UpdatedCalleeSavedRegs; + + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); + + // If there are no disabled CSRs, return the static list. + BitVector CSRDisabled = getCalleeSaveDisabledRegs(); + if (CSRDisabled.empty()) + return CSR; + + // Create a vector of the updated callee saved registers. + SmallVector CSRVector; + for (const MCPhysReg *I = CSR; *I; ++I) + if (!CSRDisabled.test(*I)) + CSRVector.push_back(*I); + + // Copy the vector into the allocated list. + MCPhysReg *UpdatedCalleeSavedRegs = + MF->allocateRegisterList(CSRVector.size() + 1); + for (unsigned I = 0, E = CSRVector.size(); I != E; ++I) + UpdatedCalleeSavedRegs[I] = CSRVector[I]; + return UpdatedCalleeSavedRegs; +} Index: lib/CodeGen/RegAllocPBQP.cpp =================================================================== --- lib/CodeGen/RegAllocPBQP.cpp +++ lib/CodeGen/RegAllocPBQP.cpp @@ -554,7 +554,8 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, const MachineFunction &MF) { - const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + MachineRegisterInfo &MRI = const_cast(MF.getRegInfo()); + const MCPhysReg *CSR = MRI.getUpdatedCalleeSavedRegs(); for (unsigned i = 0; CSR[i] != 0; ++i) if (TRI.regsOverlap(reg, CSR[i])) return true; Index: lib/CodeGen/RegisterClassInfo.cpp =================================================================== --- lib/CodeGen/RegisterClassInfo.cpp +++ lib/CodeGen/RegisterClassInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; @@ -29,8 +30,7 @@ StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), cl::desc("Limit all regclasses to N registers")); -RegisterClassInfo::RegisterClassInfo() - : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} +RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(nullptr), TRI(nullptr) {} void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; @@ -48,18 +48,26 @@ // Does this MF have different CSRs? assert(TRI && "no register info set"); - const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); - if (Update || CSR != CalleeSaved) { - // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + + // Calculate the callee saved registers. + MachineRegisterInfo &MRI = + const_cast(MF->getRegInfo()); + const MCPhysReg *CSR = MRI.getUpdatedCalleeSavedRegs(); + SmallVector ActualCalleeSaved; + for (unsigned I = 0; unsigned Reg = CSR[I]; ++I) { + ActualCalleeSaved.push_back(Reg); + } + + if (Update || ActualCalleeSaved != CalleeSavedRegs) { + // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CSRNum.clear(); - CSRNum.resize(TRI->getNumRegs(), 0); - for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) + CalleeSavedAliases.resize(TRI->getNumRegs(), 0); + for (auto Reg : ActualCalleeSaved) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + CalleeSavedAliases[*AI] = Reg; Update = true; + CalleeSavedRegs = ActualCalleeSaved; } - CalleeSaved = CSR; // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); @@ -103,7 +111,7 @@ unsigned Cost = TRI->getCostPerUse(PhysReg); MinCost = std::min(MinCost, Cost); - if (CSRNum[PhysReg]) + if (CalleeSavedAliases[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); else { Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -73,7 +73,7 @@ return; // Get the callee saved register list... - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = MF.getRegInfo().getUpdatedCalleeSavedRegs(); // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1077,7 +1077,8 @@ CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals, + uint32_t *RegMask) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2205,6 +2205,11 @@ ++I, ++OutsIndex) { CCValAssign &VA = RVLocs[I]; assert(VA.isRegLoc() && "Can only return in registers!"); + + // Add the register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(VA.getLocReg()); + SDValue ValToCopy = OutVals[OutsIndex]; EVT ValVT = ValToCopy.getValueType(); @@ -2279,6 +2284,10 @@ assert(2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"); + + // Add the second register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); } @@ -2335,6 +2344,10 @@ // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); + + // Add the returned register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(RetValReg); } const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -2514,8 +2527,10 @@ SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { + SelectionDAG &DAG, SmallVectorImpl &InVals, + uint32_t *RegMask) const { + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget.is64Bit(); @@ -2529,6 +2544,14 @@ CCValAssign &VA = RVLocs[I]; EVT CopyVT = VA.getLocVT(); + // In some calling conventions we need to remove the used registers + // from the register mask. + if (RegMask && CallConv == CallingConv::X86_RegCall) { + for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } + // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { @@ -3172,6 +3195,12 @@ } } + for (MachineRegisterInfo::livein_iterator I = MF.getRegInfo().livein_begin(), + E = MF.getRegInfo().livein_end(); + I != E; I++) + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(I->first); + return Chain; } @@ -3688,7 +3717,33 @@ Mask = RegInfo->getNoPreservedMask(); } - Ops.push_back(DAG.getRegisterMask(Mask)); + // Define a new register mask from the existing mask. + uint32_t *RegMask; + + // In some calling conventions we need to remove the used physical registers + // from the reg mask. + if (CallConv == CallingConv::X86_RegCall) { + /// @todo Need to add support in MIPrinter and MIParser to represent + /// the customed RegMask. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Allocate a new Reg Mask and copy Mask. + RegMask = MF.allocateRegisterMask(TRI->getNumRegs()); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize); + + // Make sure all sub registers of the argument registers are reset + // in the RegMask. + for (auto const &RegPair : RegsToPass) + for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } else + // Simply use the same mask instead of allocatign a new one. + RegMask = const_cast(Mask); + + // Create the RegMask Operand according to our updated mask. + Ops.push_back(DAG.getRegisterMask(RegMask)); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3741,8 +3796,8 @@ // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, RegMask); } //===----------------------------------------------------------------------===// @@ -25521,8 +25576,7 @@ // N.B. the order the invoke BBs are processed in doesn't matter here. SmallVector MBBLPads; - const MCPhysReg *SavedRegs = - Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = MF->getRegInfo().getUpdatedCalleeSavedRegs(); for (MachineBasicBlock *MBB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. Index: lib/Target/X86/X86MachineFunctionInfo.cpp =================================================================== --- lib/Target/X86/X86MachineFunctionInfo.cpp +++ lib/Target/X86/X86MachineFunctionInfo.cpp @@ -9,6 +9,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -20,11 +21,10 @@ const X86RegisterInfo *RegInfo = static_cast( MF->getSubtarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); - for (const MCPhysReg *CSR = - RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF); - unsigned Reg = *CSR; - ++CSR) - { + MachineRegisterInfo &MRI = + const_cast(MF->getRegInfo()); + for (const MCPhysReg *CSR = MRI.getUpdatedCalleeSavedRegs(); + unsigned Reg = *CSR; ++CSR) { if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) RestoreBasePointerOffset -= SlotSize; } Index: test/CodeGen/X86/DynamicCalleeSavedRegisters.ll =================================================================== --- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll +++ test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck --check-prefix=CHECK %s + +declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0); + +; In RegCall calling convention, %esi and %edi are callee saved registers. +; One might think that the caller could assume that %esi value is the same before +; and after calling the callee. +; However, RegCall also says that a register that was used for +; passing/retuning argumnets, can be assumed to be used by the callee. +; In other words, it is no longer a calle saved register. +; In this case we want to see that %edi and %esi are saved and %esi is assumed +; to be used by the caller. +; This is a hipe CC function that doesn't save any register for the caller +; of the callee. So we can be sure that there is no other reason to save +; %esi, %edi and other registers (it returns the result in %esi). +define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) + %b2 = add i32 %b1, %d0 + %b3 = add i32 %b2, %e0 + ret i32 %b3 +} +; CHECK-LABEL: caller +; CHECK: movl %edx, %ebx +; CHECK: movl %ebp, %esi +; CHECK: calll callee +; CHECK: leal (%eax,%ebx), %esi +; CHECK: addl %ebp, %esi +; CHECK: retl + +!hipe.literals = !{ !0, !1, !2 } +!0 = !{ !"P_NSP_LIMIT", i32 120 } +!1 = !{ !"X86_LEAF_WORDS", i32 24 } +!2 = !{ !"AMD64_LEAF_WORDS", i32 18 } + +; Make sure that the callee doesn't save paramteres that were passed as arguments. +define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = mul i32 7, %e0 + %b2 = udiv i32 5, %e0 + %b3 = mul i32 7, %d0 + %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0 + %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1 + %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2 + ret {i32, i32, i32} %b6 +} +; CHECK-LABEL: test_callee +; CHECK-NOT: pushl %esi +; CHECK-NOT: pushl %edi +; CHECK: retl Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -469,17 +469,14 @@ ret <32 x float> %x4 } -; X32-LABEL: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} +; X32-LABEL: testi32_inp +; X32: pushl {{%e(bx|bp)}} +; X32: pushl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} ; X32: retl -; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}} +; WIN64-LABEL: testi32_inp ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} @@ -489,7 +486,7 @@ ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: retq -; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}} +; LINUXOSX64-LABEL: testi32_inp ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} Index: test/CodeGen/X86/sse-regcall.ll =================================================================== --- test/CodeGen/X86/sse-regcall.ll +++ test/CodeGen/X86/sse-regcall.ll @@ -37,48 +37,42 @@ } ; WIN64-LABEL: testf32_inp -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; WIN64: retq ; WIN32-LABEL: testf32_inp -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill +; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload +; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload ; WIN32: retl ; LINUXOSX-LABEL: testf32_inp -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; LINUXOSX: retq ;test calling conventions - input parameters, callee saved XMMs @@ -93,10 +87,6 @@ ; WIN32-LABEL: testi32_inp ; WIN32: pushl {{%e(si|di|bx|bp)}} ; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: retl