Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -62,6 +62,12 @@ VirtReg2IndexFunctor> VRegInfo; + /// CalleeSaveDisableRegs - Lists physical registers that should not be + /// preserved. + /// Some calling conventions disable CSRs that are passed to the + /// function as arguments or are returned by the function. + BitVector CalleeSaveDisableRegs; + /// RegAllocHints - This vector records register allocation hints for virtual /// registers. For each virtual register, it keeps a register and hint type /// pair making up the allocation hint. Hint type is target specific except @@ -196,6 +202,29 @@ // Register Info //===--------------------------------------------------------------------===// + /// Sets a new register (and its aliases) in the CalleeSaveDisableRegs bit vector. + void addCalleeSaveDisableRegister(unsigned Reg) { + assert(Reg < CalleeSaveDisableRegs.size() && + "Trying to disable an invalif register"); + const TargetRegisterInfo * TRI = MF->getSubtarget().getRegisterInfo(); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + CalleeSaveDisableRegs.set(*SubRegs); + } + + /// \Returns a bit vector of registers that should not be + /// callee saved. + const BitVector &getCalleeSaveDisabledRegs() const { + return CalleeSaveDisableRegs; + } + + /// Flips the CalleeSaveDisableRegs and \returnes a copy of + /// the flipped bit vector. + BitVector getCalleeSaveEnabledRegs() const { + BitVector CalleeSaveEnabledRegs = CalleeSaveDisableRegs; + return CalleeSaveEnabledRegs.flip(); + } + // Strictly for use by MachineInstr.cpp. void addRegOperandToUseList(MachineOperand *MO); Index: include/llvm/CodeGen/RegisterClassInfo.h =================================================================== --- include/llvm/CodeGen/RegisterClassInfo.h +++ include/llvm/CodeGen/RegisterClassInfo.h @@ -52,11 +52,11 @@ const TargetRegisterInfo *TRI; // Callee saved registers of last MF. Assumed to be valid until the next - // runOnFunction() call. - const MCPhysReg *CalleeSaved; + // runOnFunction() call. Used only to determine if an update was made. + SmallVector CalleeSavedRegs; - // Map register number to CalleeSaved index + 1; - SmallVector CSRNum; + // Map register alias to the callee saved Register. + SmallVector CalleeSavedAliases; // Reserved registers in the current MF. BitVector Reserved; @@ -105,11 +105,11 @@ } /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. + /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; + if (PhysReg < CalleeSavedAliases.size()) + return CalleeSavedAliases[PhysReg]; return 0; } Index: lib/CodeGen/AggressiveAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -158,13 +158,16 @@ } } - // Mark live-out callee-saved registers. In a return block this is + // Mark live-out callee-saved registers (that are not + // passed/returned as arguments). In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); + BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs(); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; + if (CSRDisabled.test(Reg)) continue; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; Index: lib/CodeGen/CriticalAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/CriticalAntiDepBreaker.cpp +++ lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -66,12 +66,15 @@ } } - // Mark live-out callee-saved registers. In a return block this is + // Mark live-out callee-saved registers (that are not + // passed/returned as arguments). In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); + BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs(); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + if (CSRDisabled.test(*I)) continue; if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Index: lib/CodeGen/LivePhysRegs.cpp =================================================================== --- lib/CodeGen/LivePhysRegs.cpp +++ lib/CodeGen/LivePhysRegs.cpp @@ -161,8 +161,11 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, const MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) { + BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs(); for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) - LiveRegs.addReg(*CSR); + if (!CSRDisabled.test(*CSR)) + LiveRegs.addReg(*CSR); + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); } @@ -180,8 +183,10 @@ if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. + BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs(); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); + if (!CSRDisabled.test(*I)) + addReg(*I); } else { addPristines(*this, MF, MFI, *TRI); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -862,6 +862,9 @@ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) BV.set(*CSR); + // Reset all callee save disabled registers + BV &= MF.getRegInfo().getCalleeSaveEnabledRegs(); + // Saved CSRs are not pristine. for (auto &I : getCalleeSavedInfo()) for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -262,8 +262,21 @@ return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_RegisterLiveOut: { + // Calculate the size of the RegMask + const MachineFunction *MF = getParent()->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Compare the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + for (unsigned I = 0, E = RegMaskSize; I < E; ++I) + if (RegMask[I] != OtherRegMask[I]) + return false; + + return true; + } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_CFIIndex: Index: lib/CodeGen/MachineRegisterInfo.cpp =================================================================== --- lib/CodeGen/MachineRegisterInfo.cpp +++ lib/CodeGen/MachineRegisterInfo.cpp @@ -35,6 +35,7 @@ VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegMask.resize(NumRegs); + CalleeSaveDisableRegs.resize(NumRegs); PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); } Index: lib/CodeGen/RegAllocPBQP.cpp =================================================================== --- lib/CodeGen/RegAllocPBQP.cpp +++ lib/CodeGen/RegAllocPBQP.cpp @@ -555,8 +555,9 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, const MachineFunction &MF) { const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + BitVector CSRDisabled = MF.getRegInfo().getCalleeSaveDisabledRegs(); for (unsigned i = 0; CSR[i] != 0; ++i) - if (TRI.regsOverlap(reg, CSR[i])) + if (!CSRDisabled.test(CSR[i]) && TRI.regsOverlap(reg, CSR[i])) return true; return false; } Index: lib/CodeGen/RegisterClassInfo.cpp =================================================================== --- lib/CodeGen/RegisterClassInfo.cpp +++ lib/CodeGen/RegisterClassInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; @@ -29,8 +30,7 @@ StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), cl::desc("Limit all regclasses to N registers")); -RegisterClassInfo::RegisterClassInfo() - : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} +RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(nullptr), TRI(nullptr) {} void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; @@ -48,18 +48,26 @@ // Does this MF have different CSRs? assert(TRI && "no register info set"); + + // Calculate the callee saved registers. const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); - if (Update || CSR != CalleeSaved) { - // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + SmallVector ActualCalleeSaved; + for (unsigned I = 0; unsigned Reg = CSR[I]; ++I) { + if (MF->getRegInfo().getCalleeSaveDisabledRegs().test(Reg)) + continue; + ActualCalleeSaved.push_back(Reg); + } + + if (Update || ActualCalleeSaved != CalleeSavedRegs) { + // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CSRNum.clear(); - CSRNum.resize(TRI->getNumRegs(), 0); - for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) + CalleeSavedAliases.resize(TRI->getNumRegs(), 0); + for (auto Reg : ActualCalleeSaved) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + CalleeSavedAliases[*AI] = Reg; Update = true; + CalleeSavedRegs = ActualCalleeSaved; } - CalleeSaved = CSR; // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); @@ -103,7 +111,7 @@ unsigned Cost = TRI->getCostPerUse(PhysReg); MinCost = std::min(MinCost, Cost); - if (CSRNum[PhysReg]) + if (CalleeSavedAliases[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); else { Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -86,8 +86,10 @@ // Functions which call __builtin_unwind_init get all their registers saved. bool CallsUnwindInit = MF.callsUnwindInit(); const MachineRegisterInfo &MRI = MF.getRegInfo(); + BitVector CSRDisabled = MRI.getCalleeSaveDisabledRegs(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; + if (CSRDisabled.test(Reg)) continue; if (CallsUnwindInit || MRI.isPhysRegModified(Reg)) SavedRegs.set(Reg); } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1077,7 +1077,8 @@ CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals, + uint32_t *RegMask) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2205,6 +2205,11 @@ ++I, ++OutsIndex) { CCValAssign &VA = RVLocs[I]; assert(VA.isRegLoc() && "Can only return in registers!"); + + // Add the register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(VA.getLocReg()); + SDValue ValToCopy = OutVals[OutsIndex]; EVT ValVT = ValToCopy.getValueType(); @@ -2279,6 +2284,10 @@ assert(2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"); + + // Add the second register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); } @@ -2335,6 +2344,10 @@ // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); + + // Add the returned register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(RetValReg); } const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -2514,8 +2527,10 @@ SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { + SelectionDAG &DAG, SmallVectorImpl &InVals, + uint32_t *RegMask) const { + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget.is64Bit(); @@ -2529,6 +2544,14 @@ CCValAssign &VA = RVLocs[I]; EVT CopyVT = VA.getLocVT(); + // In some calling conventions we need to remove the used registers + // from the register mask. + if (RegMask && CallConv == CallingConv::X86_RegCall) { + for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } + // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { @@ -2872,6 +2895,10 @@ CCValAssign &VA = ArgLocs[I]; if (VA.isRegLoc()) { + // Add the register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(VA.getLocReg()); + EVT RegVT = VA.getLocVT(); if (VA.needsCustom()) { assert( @@ -2882,6 +2909,10 @@ // compiled to 32 bit arch, are splited up into two registers. ArgValue = getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); + + // Add the second register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().addCalleeSaveDisableRegister(ArgLocs[I].getLocReg()); } else { const TargetRegisterClass *RC; if (RegVT == MVT::i32) @@ -3688,7 +3719,33 @@ Mask = RegInfo->getNoPreservedMask(); } - Ops.push_back(DAG.getRegisterMask(Mask)); + // Define a new register mask from the existing mask. + uint32_t *RegMask; + + // In some calling conventions we need to remove the used physical registers + // from the reg mask. + if (CallConv == CallingConv::X86_RegCall) { + /// @todo Need to add support in MIPrinter and MIParser to represent + /// the customed RegMask. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Allocate a new Reg Mask and copy Mask. + RegMask = MF.allocateRegisterMask(TRI->getNumRegs()); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize); + + // Make sure all sub registers of the argument registers are reset + // in the RegMask. + for (auto const &RegPair : RegsToPass) + for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } else + // Simply use the same mask instead of allocatign a new one. + RegMask = const_cast(Mask); + + // Create the RegMask Operand according to our updated mask. + Ops.push_back(DAG.getRegisterMask(RegMask)); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3741,8 +3798,8 @@ // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, RegMask); } //===----------------------------------------------------------------------===// @@ -25555,6 +25612,7 @@ MachineInstrBuilder MIB(*MF, &II); for (unsigned RI = 0; SavedRegs[RI]; ++RI) { unsigned Reg = SavedRegs[RI]; + if (MRI->getCalleeSaveDisabledRegs().test(Reg)) continue; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } Index: lib/Target/X86/X86MachineFunctionInfo.cpp =================================================================== --- lib/Target/X86/X86MachineFunctionInfo.cpp +++ lib/Target/X86/X86MachineFunctionInfo.cpp @@ -9,6 +9,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -25,6 +26,8 @@ unsigned Reg = *CSR; ++CSR) { + if (MF->getRegInfo().getCalleeSaveDisabledRegs().test(Reg)) + continue; if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) RestoreBasePointerOffset -= SlotSize; } Index: test/CodeGen/X86/DynamicCalleeSavedRegisters.ll =================================================================== --- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll +++ test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck --check-prefix=CHECK %s + +define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = mul i32 7, %e0 + %b2 = udiv i32 5, %e0 + %b3 = mul i32 7, %d0 + %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0 + %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1 + %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2 + ret {i32, i32, i32} %b6 +} + +declare x86_regcallcc i32 @test_caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0); + +; This is a hipe CC function that doesn't save any register for the caller of the caller +; In this case we see that %edi and %esi are not saved and %esi is assumed to be saved by test function +define cc 11 i32 @caller(i32 %a0) nounwind { + %b1 = call x86_regcallcc i32 @test_caller(i32 %a0, i32 %a0, i32 %a0, i32 %a0, i32 %a0) + %b2 = add i32 %b1, %a0 + ret i32 %b2 +} +; CHECK-LABEL: caller +; CHECK: movl %esi, %ebx +; CHECK: calll test +; CHECK: movl %ebx, %esi +; CHECK: ret{{l|q}} + +!hipe.literals = !{ !0, !1, !2 } +!0 = !{ !"P_NSP_LIMIT", i32 120 } +!1 = !{ !"X86_LEAF_WORDS", i32 24 } +!2 = !{ !"AMD64_LEAF_WORDS", i32 18 } Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -469,17 +469,14 @@ ret <32 x float> %x4 } -; X32-LABEL: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} +; X32-LABEL: testi32_inp +; X32: pushl {{%e(bx|bp)}} +; X32: pushl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} ; X32: retl -; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}} +; WIN64-LABEL: testi32_inp ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} @@ -489,7 +486,7 @@ ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: retq -; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}} +; LINUXOSX64-LABEL: testi32_inp ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} Index: test/CodeGen/X86/sse-regcall.ll =================================================================== --- test/CodeGen/X86/sse-regcall.ll +++ test/CodeGen/X86/sse-regcall.ll @@ -37,48 +37,42 @@ } ; WIN64-LABEL: testf32_inp -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; WIN64: retq ; WIN32-LABEL: testf32_inp -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill +; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload +; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload ; WIN32: retl ; LINUXOSX-LABEL: testf32_inp -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; LINUXOSX: retq ;test calling conventions - input parameters, callee saved XMMs @@ -93,10 +87,6 @@ ; WIN32-LABEL: testi32_inp ; WIN32: pushl {{%e(si|di|bx|bp)}} ; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: retl