Index: llvm/include/llvm/CodeGen/Passes.h =================================================================== --- llvm/include/llvm/CodeGen/Passes.h +++ llvm/include/llvm/CodeGen/Passes.h @@ -473,6 +473,9 @@ /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); + /// The pass fixups statepoint machine instruction to replace usage of + /// caller saved registers with stack slots. + extern char &FixupStatepointCallerSavedID; } // End llvm namespace #endif Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -151,6 +151,7 @@ void initializeFEntryInserterPass(PassRegistry&); void initializeFinalizeISelPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); +void initializeFixupStatepointCallerSavedPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeFloat2IntLegacyPassPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -30,6 +30,7 @@ FaultMaps.cpp FEntryInserter.cpp FinalizeISel.cpp + FixupStatepointCallerSaved.cpp FuncletLayout.cpp GCMetadata.cpp GCMetadataPrinter.cpp Index: llvm/lib/CodeGen/CodeGen.cpp =================================================================== --- llvm/lib/CodeGen/CodeGen.cpp +++ llvm/lib/CodeGen/CodeGen.cpp @@ -38,6 +38,7 @@ initializeFEntryInserterPass(Registry); initializeFinalizeISelPass(Registry); initializeFinalizeMachineBundlesPass(Registry); + initializeFixupStatepointCallerSavedPass(Registry); initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); Index: llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -0,0 +1,332 @@ +//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Statepoint instruction in deopt parameters contains values which is +/// meaningful to the runtime and should be able to read it. +/// When the placement of the value is determined by register allocator +/// the value may appear on stack or register. +/// If the value is on callee saved register it will be spilled somewhere on +/// the stack (on one of the next frame or on entry to runtime). +/// Caller saved registers may be not spilled at all and if the frame +/// corresponding to statepoint instruction is in middle of the stack the +/// value may be completely lost at the moment of entering runtime because +/// according to ABI callee is not responsible to preserve it. +/// This pass consider all deopt parameters and if the value is on caller +/// saved register it forces its spill on the frame and re-writes statepoint +/// to point to stack location. +/// +/// Note: The value can be on caller saved register and at the same time +/// spilled by register allocator. It is difficult to detect this in post RA +/// pass, so for simplification this pass forces another spill in this case. +/// It makes to consider to update register allocator to do this modification +/// inside register allocator at the moment when all virtual registers are +/// already assigned to physical registers and spill slots are determined +/// but virtual registers are not replace yet. +/// This might be better but more complex because it will require to support +/// update of all data structures register allocator handles. +/// At this moment this pass seems good enough and easier to implement. +/// Anyway insertion to register allocator is something to look into. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Support/Debug.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "fixup-statepoint-caller-saved" +STATISTIC(NumSpilledRegisters, "Number of spilled register"); +STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated"); +STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended"); + +static cl::opt FixupSCSExtendSlotSize( + "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false), + cl::desc("Allow spill in spill slot of greater size than register size"), + cl::Hidden); + +namespace { + +class FixupStatepointCallerSaved : public MachineFunctionPass { +public: + static char ID; + + FixupStatepointCallerSaved() : MachineFunctionPass(ID) { + initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "Fixup Statepoint Caller Saved"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char FixupStatepointCallerSaved::ID = 0; +char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID; + +INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE, + "Fixup Statepoint Caller Saved", false, false) +INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE, + "Fixup Statepoint Caller Saved", false, false) + +// Cache used frame indexes during statepoint re-write to re-use them in +// processing next statepoint instruction. +// Two strategies. One is to preserve the size of spill slot while another one +// extends the size of spill slots to reduce the number of them, causing +// the less total frame size. But unspill will have "implicit" any extend. +class FrameIndexesCache { +private: + struct FrameIndexesPerSize { + // List of used frame indexes during processing previous statepoints. + SmallVector Slots; + // Current index of un-used yet frame index. + unsigned Index = 0; + }; + MachineFrameInfo &MFI; + const TargetRegisterInfo &TRI; + // Map size to list of frame indexes of this size. If the mode is + // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes. + // If the size of required spill slot is greater than in a cache then the + // size will be increased. + DenseMap Cache; + // Keep the cache of size of spill slot required to spill the register. + DenseMap RegToSpillSize; + +public: + FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) + : MFI(MFI), TRI(TRI) {} + // Reset the current state of used frame indexes. After invocation of + // this function all frame indexes are available for allocation. + void reset() { + for (auto &It : Cache) + It.second.Index = 0; + } + // Get frame index to spill the register. + int getFrameIndex(Register Reg) { + assert(RegToSpillSize.count(Reg) && "No size for Register"); + unsigned Size = RegToSpillSize[Reg]; + // In FixupSCSExtendSlotSize mode the bucket with 0 index is used + // for all sizes. + unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size; + FrameIndexesPerSize &Line = Cache[Bucket]; + if (Line.Index < Line.Slots.size()) { + int FI = Line.Slots[Line.Index++]; + // If all sizes are kept together we probably need to extend the + // spill slot size. + if (FixupSCSExtendSlotSize && MFI.getObjectSize(FI) < Size) { + MFI.setObjectSize(FI, Size); + MFI.setObjectAlignment(FI, Align(Size)); + NumSpillSlotsExtended++; + } + return FI; + } + int FI = MFI.CreateSpillStackObject(Size, Size); + NumSpillSlotsAllocated++; + Line.Slots.push_back(FI); + ++Line.Index; + return FI; + } + // Add size of register to cache. + void addRegSize(Register Reg) { + if (RegToSpillSize.count(Reg)) + return; + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + RegToSpillSize[Reg] = TRI.getSpillSize(*RC); + } + // Sort all registers to spill in descendent order. In the + // FixupSCSExtendSlotSize mode it will minimize the total frame size. + // In non FixupSCSExtendSlotSize mode we can skip this step. + void sortRegisters(SmallVectorImpl &Regs) { + if (!FixupSCSExtendSlotSize) + return; + std::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) { + return RegToSpillSize[A] > RegToSpillSize[B]; + }); + } +}; + +// Describes the state of the current processing statepoint instruction. +class StatepointState { +private: + // statepoint instruction. + MachineInstr &MI; + MachineFunction &MF; + const TargetRegisterInfo &TRI; + const TargetInstrInfo &TII; + MachineFrameInfo &MFI; + // Mask with callee saved registers. + const uint32_t *Mask; + // Cache of frame indexes used on previous instruction processing. + FrameIndexesCache &CacheFI; + // Operands with physical registers requiring spilling. + SmallVector Ops; + // Set of register to spill. + SmallSet VisitedRegs; + SmallVector RegsToSpill; + // Map Register to Frame Slot index. + DenseMap RegToSlotIdx; + +public: + StatepointState(MachineInstr &MI, const uint32_t *Mask, + FrameIndexesCache &CacheFI) + : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()), + TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()), + Mask(Mask), CacheFI(CacheFI) {} + // Return true if register is callee saved. + bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; } + // Iterates over statepoint meta args to find caller saver registers. + // Also cache the size of found registers. + // Returns true if caller save registers found. + bool findRegistersToSpill() { + for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), + EndIdx = MI.getNumOperands(); + Idx < EndIdx; ++Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isReg() || MO.isImplicit()) + continue; + Register Reg = MO.getReg(); + assert(Reg.isPhysical() && "Only physical regs are expected"); + if (isCalleeSaved(Reg)) + continue; + if (VisitedRegs.insert(Reg).second) + RegsToSpill.push_back(Reg); + Ops.push_back(Idx); + CacheFI.addRegSize(Reg); + } + CacheFI.sortRegisters(RegsToSpill); + return !RegsToSpill.empty(); + } + // Spill all caller saved registers right before statepoint instruction. + // Remember frame index where register is spilled. + void spillRegisters() { + for (Register Reg : RegsToSpill) { + int FI = CacheFI.getFrameIndex(Reg); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI, + RC, &TRI); + NumSpilledRegisters++; + RegToSlotIdx[Reg] = FI; + } + } + // Re-write statepoint machine instruction to replace caller saved operands + // with indirect memory location (frame index). + void rewriteStatepoint() { + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // Add End marker. + Ops.push_back(MI.getNumOperands()); + unsigned CurOpIdx = 0; + + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (I == Ops[CurOpIdx]) { + int FI = RegToSlotIdx[MO.getReg()]; + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(MFI.getObjectSize(FI)); + assert(MO.isReg() && "Should be register"); + assert(MO.getReg().isPhysical() && "Should be physical register"); + MIB.addFrameIndex(FI); + MIB.addImm(0); + ++CurOpIdx; + } else + MIB.add(MO); + } + assert(CurOpIdx == (Ops.size() - 1) && "Not all operands processed"); + // Add mem operands. + NewMI->setMemRefs(MF, MI.memoperands()); + for (auto It : RegToSlotIdx) { + int FrameIndex = It.second; + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlign(FrameIndex)); + NewMI->addMemOperand(MF, MMO); + } + // Insert new statepoint and erase old one. + MI.getParent()->insert(MI, NewMI); + MI.eraseFromParent(); + } +}; + +class StatepointProcessor { +private: + MachineFunction &MF; + const TargetRegisterInfo &TRI; + FrameIndexesCache CacheFI; + +public: + StatepointProcessor(MachineFunction &MF) + : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()), + CacheFI(MF.getFrameInfo(), TRI) {} + + bool process(MachineInstr &MI) { + unsigned VarIdx = StatepointOpers(&MI).getVarIdx(); + uint64_t Flags = + MI.getOperand(VarIdx + StatepointOpers::FlagsOffset).getImm(); + // Do nothing for LiveIn, it supports all registers. + if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn) + return false; + CallingConv::ID CC = + MI.getOperand(VarIdx + StatepointOpers::CCOffset).getImm(); + const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC); + CacheFI.reset(); + StatepointState SS(MI, Mask, CacheFI); + + if (!SS.findRegistersToSpill()) + return false; + + SS.spillRegisters(); + SS.rewriteStatepoint(); + return true; + } +}; + +bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const Function &F = MF.getFunction(); + if (!F.hasGC()) + return false; + + SmallVector Statepoints; + for (MachineBasicBlock &BB : MF) + for (MachineInstr &I : BB) + if (I.getOpcode() == TargetOpcode::STATEPOINT) + Statepoints.push_back(&I); + + if (Statepoints.empty()) + return false; + + bool Changed = false; + StatepointProcessor SPP(MF); + for (MachineInstr *I : Statepoints) + Changed |= SPP.process(*I); + return Changed; +} Index: llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -61,6 +61,10 @@ STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); +cl::opt UseRegistersForStatepoint( + "use-register-for-statepoint", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for non pointer deopt args")); + static void pushStackMapConstant(SmallVectorImpl& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -372,10 +376,11 @@ /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, - SmallVectorImpl &Ops, - SmallVectorImpl &MemRefs, - SelectionDAGBuilder &Builder) { +static void +lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, + SmallVectorImpl &Ops, + SmallVectorImpl &MemRefs, + SelectionDAGBuilder &Builder) { // Note: We know all of these spills are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. @@ -401,14 +406,15 @@ auto &MF = Builder.DAG.getMachineFunction(); auto *MMO = getMachineMemOperand(MF, *FI); MemRefs.push_back(MMO); - - } else if (LiveInOnly) { + + } else if (!RequireSpillSlot) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll // end up folding some of these into stack references, but they'll be // handled by the register allocator. Note that we do not have the notion // of a late use so these values might be placed in registers which are - // clobbered by the call. This is fine for live-in. + // clobbered by the call. This is fine for live-in. For live-through + // fix-up pass should be executed to force spilling of such registers. Ops.push_back(Incoming); } else { // Otherwise, locate a spill slot and explicitly spill it so it @@ -482,14 +488,17 @@ const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; - auto isGCValue = [&](const Value *V) { + auto requireSpillSlot = [&](const Value *V) { auto *Ty = V->getType(); - if (!Ty->isPtrOrPtrVectorTy()) + if (Ty->isPtrOrPtrVectorTy()) { + if (auto *GFI = Builder.GFI) + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + return *IsManaged; + return true; // conservative + } + if (LiveInDeopt) return false; - if (auto *GFI = Builder.GFI) - if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) - return *IsManaged; - return true; // conservative + return !UseRegistersForStatepoint; }; // Before we actually start lowering (and allocating spill slots for values), @@ -498,7 +507,7 @@ // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - if (!LiveInDeopt || isGCValue(V)) + if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { @@ -525,8 +534,8 @@ } if (!Incoming.getNode()) Incoming = Builder.getValue(V); - const bool LiveInValue = LiveInDeopt && !isGCValue(V); - lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, + Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -536,12 +545,14 @@ // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); } // If there are any explicit spill slots passed to the statepoint, record Index: llvm/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/lib/CodeGen/TargetPassConfig.cpp +++ llvm/lib/CodeGen/TargetPassConfig.cpp @@ -204,6 +204,8 @@ cl::desc("Stop compilation before a specific pass"), cl::value_desc("pass-name"), cl::init(""), cl::Hidden); +extern cl::opt UseRegistersForStatepoint; + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -911,6 +913,9 @@ // Run post-ra passes. addPostRegAlloc(); + if (UseRegistersForStatepoint) + addPass(&FixupStatepointCallerSavedID); + // Insert prolog/epilog code. Eliminate abstract frame index references... if (getOptLevel() != CodeGenOpt::None) { addPass(&PostRAMachineSinkingID); Index: llvm/test/CodeGen/X86/statepoint-regs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/statepoint-regs.ll @@ -0,0 +1,679 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -O3 -restrict-statepoint-remat -use-register-for-statepoint < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @bar() #0 +declare void @baz() + +; Spill caller saved register for %a. +define void @test1(i32 %a) gc "statepoint-example" { +; CHECK-LABEL: test1: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: callq _bar ## 4-byte Folded Reload +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a) + ret void +} + +; Callee save registers are ok. +define void @test2(i32 %a, i32 %b) gc "statepoint-example" { +; CHECK-LABEL: test2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %a, i32 %b) + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 2, i32 %b, i32 %a) + ret void +} + +; Arguments in caller saved registers, so they must be spilled. +define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" { +; CHECK-LABEL: test3: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill +; CHECK-NEXT: callq _bar ## 24-byte Folded Reload +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) + ret void +} + +; This case just confirms that we don't crash when given more live values +; than registers. This is a case where we *have* to use a stack slot. This +; also ends up being a good test of whether we can fold loads from immutable +; stack slots into the statepoint. +define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; CHECK-LABEL: test4: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill +; CHECK-NEXT: callq _bar ## 24-byte Folded Reload +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; A gc-value must be spilled even if it is also a deopt value. +define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" { +; CHECK-LABEL: test5: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p) + %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9) + %token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p2, i32 addrspace(1)* %p2) + %p3 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token2, i32 9, i32 9) + ret i32 addrspace(1)* %p3 +} + +; Callee saved are ok again. +define void @test6(i32 %a) gc "statepoint-example" { +; CHECK-LABEL: test6: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: callq _baz +; CHECK-NEXT: Ltmp7: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp8: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a) + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a) + ret void +} + +; Many deopt values. +define void @test7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test7: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $168, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 224 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %edi, %edi +; CHECK-NEXT: movl %esi, %esi +; CHECK-NEXT: movl %edx, %edx +; CHECK-NEXT: movl %ecx, %ecx +; CHECK-NEXT: movl %r8d, %r8d +; CHECK-NEXT: movl %r9d, %r9d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 160-byte Folded Reload +; CHECK-NEXT: Ltmp9: +; CHECK-NEXT: addq $168, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %a64 = zext i32 %a to i64 + %b64 = zext i32 %b to i64 + %c64 = zext i32 %c to i64 + %d64 = zext i32 %d to i64 + %e64 = zext i32 %e to i64 + %f64 = zext i32 %f to i64 + %g64 = zext i32 %g to i64 + %h64 = zext i32 %h to i64 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} + +; a variant of test7 with mixed types chosen to exercise register aliases +define void @test8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test8: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $104, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %r9d, %r10d +; CHECK-NEXT: movl %r8d, %r9d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %cl, (%rsp) ## 1-byte Spill +; CHECK-NEXT: movw %r9w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %r10w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 104-byte Folded Reload +; CHECK-NEXT: Ltmp10: +; CHECK-NEXT: addq $104, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %a8 = trunc i32 %a to i8 + %b8 = trunc i32 %b to i8 + %c8 = trunc i32 %c to i8 + %d8 = trunc i32 %d to i8 + %e16 = trunc i32 %e to i16 + %f16 = trunc i32 %f to i16 + %g16 = trunc i32 %g to i16 + %h16 = trunc i32 %h to i16 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i8 %a8, i8 %b8, i8 %c8, i8 %d8, i16 %e16, i16 %f16, i16 %g16, i16 %h16, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} + +; Test perfect forwarding of argument registers and stack slots to the +; deopt bundle uses +define void @test9(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; CHECK-LABEL: test9: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %r9d, (%rsp) ## 4-byte Spill +; CHECK-NEXT: callq _bar ## 24-byte Folded Reload +; CHECK-NEXT: Ltmp11: +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; Test enough folding of argument slots when we have one call which clobbers +; registers before a second which needs them - i.e. we must do something with +; arguments originally passed in registers +define void @test10(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; FIXME (minor): It would be better to just spill (and fold reload) for +; argument registers then spill and fill all the CSRs. +; CHECK-LABEL: test10: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %r9d, %r15d +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movl %ecx, %r12d +; CHECK-NEXT: movl %edx, %r13d +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp12: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp13: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + %statepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; Check that we can remat some uses of a def despite not remating before the +; statepoint user. +define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; FIXME: The codegen for this is correct, but horrible. Lots of room for +; improvement if we so desire. +; CHECK-LABEL: test11: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $168, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 224 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movl %edx, %r12d +; CHECK-NEXT: movl %ecx, %r13d +; CHECK-NEXT: movl %r8d, %ebp +; CHECK-NEXT: movl %r9d, %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 160-byte Folded Reload +; CHECK-NEXT: Ltmp14: +; CHECK-NEXT: addq %r15, %rbx +; CHECK-NEXT: addq %r12, %rbx +; CHECK-NEXT: addq %r13, %rbx +; CHECK-NEXT: addq %rbp, %rbx +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: addq $168, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + +entry: + %a64 = zext i32 %a to i64 + %b64 = zext i32 %b to i64 + %c64 = zext i32 %c to i64 + %d64 = zext i32 %d to i64 + %e64 = zext i32 %e to i64 + %f64 = zext i32 %f to i64 + %g64 = zext i32 %g to i64 + %h64 = zext i32 %h to i64 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + %addab = add i64 %a64, %b64 + %addc = add i64 %addab, %c64 + %addd = add i64 %addc, %d64 + %adde = add i64 %addd, %e64 + %addf = add i64 %adde, %f64 + %addg = add i64 %addf, %g64 + %addh = add i64 %addg, %h64 + %addi = add i64 %addh, %i64 + %addj = add i64 %addi, %j64 + %addk = add i64 %addj, %k64 + %addl = add i64 %addk, %l64 + %addm = add i64 %addl, %m64 + %addn = add i64 %addm, %n64 + %addo = add i64 %addn, %o64 + %addp = add i64 %addo, %p64 + %addq = add i64 %addp, %q64 + %addr = add i64 %addq, %r64 + %adds = add i64 %addr, %s64 + %addt = add i64 %adds, %t64 + %addu = add i64 %addt, %u64 + %addv = add i64 %addu, %v64 + %addw = add i64 %addv, %w64 + %addx = add i64 %addw, %x64 + %addy = add i64 %addx, %y64 + %addz = add i64 %addy, %z64 + ret i64 %addz +} + +; Demonstrate address of a function (w/ spilling due to caller saved register is used) +define void @addr_func() gc "statepoint-example" { +; CHECK-LABEL: addr_func: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq _bar@{{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 8-byte Folded Reload +; CHECK-NEXT: Ltmp15: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, void ()* @bar, void ()* @bar, void ()* @bar) + ret void +} + +; Demonstrate address of a global (w/ spilling due to caller saved register is used) +@G = external global i32 +define void @addr_global() gc "statepoint-example" { +; CHECK-LABEL: addr_global: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq _G@{{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 8-byte Folded Reload +; CHECK-NEXT: Ltmp16: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* @G, i32* @G, i32* @G) + ret void +} + +define void @addr_alloca(i32 %v) gc "statepoint-example" { +; CHECK-LABEL: addr_alloca: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp17: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +entry: + %a = alloca i32 + store i32 %v, i32* %a + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i64 0, i64 3, i32* %a, i32* %a, i32* %a) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) + +attributes #0 = { "deopt-lowering"="live-in" } +attributes #1 = { "deopt-lowering"="live-through" }