diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -46,8 +46,23 @@ cl::desc("Allow spill in spill slot of greater size than register size"), cl::Hidden); +static cl::opt PassGCPtrInCSR( + "fixup-allow-gcptr-in-csr", cl::Hidden, cl::init(false), + cl::desc("Allow passing GC Pointer arguments in callee saved registers")); + +static cl::opt DebugSpillCSR( + "fixup-debug-spill-csr", cl::Hidden, cl::init(false), + cl::desc("Spill CSR for debug purposes, but do not modify statepoint")); + +// Debugging aid option. +static cl::opt MaxStatepointsWithRegs( + "fixup-max-csr-statepoints", cl::Hidden, cl::init(0), + cl::desc("Max number of statepoints allowed to pass GC Ptrs in registers")); + namespace { +class FrameIndexesCache; + class FixupStatepointCallerSaved : public MachineFunctionPass { public: static char ID; @@ -66,7 +81,12 @@ } bool runOnMachineFunction(MachineFunction &MF) override; + +private: + void collectGlobalFIs(MachineBasicBlock &BB, FrameIndexesCache &Cache, + const TargetRegisterInfo *TRI); }; + } // End anonymous namespace. char FixupStatepointCallerSaved::ID = 0; @@ -83,6 +103,49 @@ return TRI.getSpillSize(*RC); } +// Advance iterator to the next stack map entry +static MachineInstr::const_mop_iterator +advanceToNextStackMapElt(MachineInstr::const_mop_iterator MOI) { + if (MOI->isImm()) { + switch (MOI->getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: + MOI += 2; // , + break; + case StackMaps::IndirectMemRefOp: + MOI += 3; // , , + break; + case StackMaps::ConstantOp: + MOI += 1; + break; + } + } + return ++MOI; +} + +// Return statepoint GC args as a set +static SmallSet collectGCRegs(MachineInstr &MI) { + StatepointOpers SO(&MI); + unsigned VarIdx = SO.getVarIdx(); + unsigned NumDeoptIdx = VarIdx + 5; + unsigned NumDeoptArgs = MI.getOperand(NumDeoptIdx).getImm(); + MachineInstr::const_mop_iterator MOI(MI.operands_begin() + NumDeoptIdx + 1), + MOE(MI.operands_end()); + + // Skip deopt args + for (unsigned i = 0; i < NumDeoptArgs; ++i) + MOI = advanceToNextStackMapElt(MOI); + + SmallSet Result; + while (MOI != MOE) { + if (MOI->isReg() && !MOI->isImplicit()) + Result.insert(MOI->getReg()); + MOI = advanceToNextStackMapElt(MOI); + } + return Result; +} + namespace { // Cache used frame indexes during statepoint re-write to re-use them in // processing next statepoint instruction. @@ -105,6 +168,13 @@ // size will be increased. DenseMap Cache; + // Landing pad can be destination of several statepoints. Every register + // defined by such statepoints must be spilled to the same stack slot. + // This map keeps that information. + // NOTE: we assume that spill slot live ranges do not intersect. + using RegStatepointPair = std::pair; + DenseMap GlobalIndices; + public: FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) : MFI(MFI), TRI(TRI) {} @@ -114,8 +184,19 @@ for (auto &It : Cache) It.second.Index = 0; } + // Get frame index to spill the register. - int getFrameIndex(Register Reg) { + int getFrameIndex(Register Reg, MachineInstr *MI = nullptr) { + if (MI) { + auto It = GlobalIndices.find(std::make_pair(Reg, MI)); + if (It != GlobalIndices.end()) { + int FI = It->second; + LLVM_DEBUG(dbgs() << "Found global FI " << FI << " for register " + << printReg(Reg, &TRI) << " at " << *MI); + return FI; + } + } + unsigned Size = getRegisterSize(TRI, Reg); // In FixupSCSExtendSlotSize mode the bucket with 0 index is used // for all sizes. @@ -148,8 +229,32 @@ return getRegisterSize(TRI, A) > getRegisterSize(TRI, B); }); } + + // Record frame index to be used to spill register \p Reg at instr \p MI. + void addGlobalSpillSlot(Register Reg, MachineInstr *MI, int FI) { + auto P = std::make_pair(Reg, MI); + GlobalIndices.insert(std::make_pair(P, FI)); + } }; +// Check if we already inserted reload of register Reg from spill slot FI +// in basic block MBB. +// This can happen in EH pad block which is successor of several +// statepoints. +static bool hasRegReload(Register Reg, int FI, MachineBasicBlock *MBB, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + auto I = MBB->SkipPHIsLabelsAndDebug(MBB->begin()), E = MBB->end(); + int Dummy; + for (; I != E; ++I) { + if (TII->isLoadFromStackSlot(*I, Dummy) == Reg && Dummy == FI) + return true; + if (I->modifiesRegister(Reg, TRI) || I->readsRegister(Reg, TRI)) + return false; + } + return false; +} + // Describes the state of the current processing statepoint instruction. class StatepointState { private: @@ -163,6 +268,7 @@ const uint32_t *Mask; // Cache of frame indexes used on previous instruction processing. FrameIndexesCache &CacheFI; + bool AllowGCPtrInCSR; // Operands with physical registers requiring spilling. SmallVector OpsToSpill; // Set of register to spill. @@ -172,17 +278,20 @@ public: StatepointState(MachineInstr &MI, const uint32_t *Mask, - FrameIndexesCache &CacheFI) + FrameIndexesCache &CacheFI, bool AllowGCPtrInCSR) : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()), TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()), - Mask(Mask), CacheFI(CacheFI) {} + Mask(Mask), CacheFI(CacheFI), AllowGCPtrInCSR(AllowGCPtrInCSR) {} + // Return true if register is callee saved. bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; } + // Iterates over statepoint meta args to find caller saver registers. // Also cache the size of found registers. // Returns true if caller save registers found. bool findRegistersToSpill() { SmallSet VisitedRegs; + SmallSet GCRegs = collectGCRegs(MI); for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), EndIdx = MI.getNumOperands(); Idx < EndIdx; ++Idx) { @@ -191,8 +300,19 @@ continue; Register Reg = MO.getReg(); assert(Reg.isPhysical() && "Only physical regs are expected"); - if (isCalleeSaved(Reg)) + + if (isCalleeSaved(Reg) && + (AllowGCPtrInCSR || !is_contained(GCRegs, Reg))) { + // If debug switch on, add Reg to spill set, but do not record its + // operand as needed rewrite. + if (DebugSpillCSR && VisitedRegs.insert(Reg).second) + RegsToSpill.push_back(Reg); continue; + } + + LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index " + << Idx << "\n"); + if (VisitedRegs.insert(Reg).second) RegsToSpill.push_back(Reg); OpsToSpill.push_back(Idx); @@ -200,18 +320,97 @@ CacheFI.sortRegisters(RegsToSpill); return !RegsToSpill.empty(); } + // Spill all caller saved registers right before statepoint instruction. // Remember frame index where register is spilled. void spillRegisters() { for (Register Reg : RegsToSpill) { - int FI = CacheFI.getFrameIndex(Reg); + int FI = CacheFI.getFrameIndex(Reg, &MI); const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI, - RC, &TRI); + NumSpilledRegisters++; RegToSlotIdx[Reg] = FI; + + LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, &TRI) << "\n"); + bool isKill = true; + MachineInstr *InsertBefore = &MI; + + // Perform trivial copy propagation + MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::reverse_iterator B(MI); + MachineInstr *Def = nullptr, *Use = nullptr; + for (auto It = std::next(B); It != MBB->rend(); ++It) { + if (It->readsRegister(Reg, &TRI) && !Use) + Use = &*It; + if (It->modifiesRegister(Reg, &TRI)) { + Def = &*It; + break; + } + } + if (Def) + if (auto DestSrc = TII.isCopyInstr(*Def)) + if (DestSrc->Destination->getReg() == Reg) { + Register SrcReg = DestSrc->Source->getReg(); + LLVM_DEBUG(dbgs() << "spillRegisters: perform copy propagation " + << printReg(Reg, &TRI) << " -> " << printReg(SrcReg, &TRI) + << "\n"); + Reg = SrcReg; + isKill = DestSrc->Source->isKill(); + InsertBefore = Def->getNextNode(); + if (!Use) + Def->eraseFromParent(); + } + + LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); + TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, isKill, FI, + RC, &TRI); + } + } + + void insertReloadBefore(unsigned Reg, MachineBasicBlock::iterator It, + MachineBasicBlock *MBB) { + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + int FI = RegToSlotIdx[Reg]; + if (It != MBB->end()) { + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + return; + } + + // To insert reload at the end of MBB, insert it before last instruction + // and then swap them. + assert(MBB->begin() != MBB->end() && "Empty block"); + --It; + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + MachineInstr *Reload = It->getPrevNode(); + int Dummy = 0; + assert(TII.isLoadFromStackSlot(*Reload, Dummy) == Reg); + assert(Dummy == FI); + MBB->remove(Reload); + MBB->insertAfter(It, Reload); + } + + // Insert reload of register Reg after it has been spilled in statepoint. + void insertReloads(unsigned Reg) { + MachineBasicBlock *MBB = MI.getParent(); + auto It = MI.getIterator(); + insertReloadBefore(Reg, ++It, MBB); + + // XXX: can we have two statepoints in the same MBB, first not throwing + // and second throwing? + if (!MBB->hasEHPadSuccessor()) + return; + + int FI = RegToSlotIdx[Reg]; + for (auto Succ : MBB->successors()) { + if (!Succ->isEHPad()) + continue; + if (hasRegReload(Reg, FI, Succ, &TII, &TRI)) + continue; + auto It = Succ->SkipPHIsLabelsAndDebug(Succ->begin()); + insertReloadBefore(Reg, It, Succ); } } + // Re-write statepoint machine instruction to replace caller saved operands // with indirect memory location (frame index). void rewriteStatepoint() { @@ -219,11 +418,36 @@ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); + unsigned NumOps = MI.getNumOperands(); + + // Set of registers to reload after statepoint. + SmallVector RegsToReload; + // New indices for the remaining defs. + SmallVector NewIndices; + unsigned NumDefs = MI.getNumDefs(); + for (unsigned I = 0; I < NumDefs; ++I) { + MachineOperand &DefMO = MI.getOperand(I); + assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand"); + Register Reg = DefMO.getReg(); + if (!AllowGCPtrInCSR) { + assert(is_contained(RegsToSpill, Reg)); + RegsToReload.push_back(Reg); + } else { + if (isCalleeSaved(Reg)) { + NewIndices.push_back(NewMI->getNumOperands()); + MIB.addReg(Reg, RegState::Define); + } else { + NewIndices.push_back(NumOps); + RegsToReload.push_back(Reg); + } + } + } + // Add End marker. OpsToSpill.push_back(MI.getNumOperands()); unsigned CurOpIdx = 0; - for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + for (unsigned I = NumDefs; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); if (I == OpsToSpill[CurOpIdx]) { int FI = RegToSlotIdx[MO.getReg()]; @@ -234,8 +458,15 @@ MIB.addFrameIndex(FI); MIB.addImm(0); ++CurOpIdx; - } else + } else { MIB.add(MO); + unsigned OldDef; + if (AllowGCPtrInCSR && MI.isRegTiedToDefOperand(I, &OldDef)) { + assert(OldDef < NumDefs); + assert(NewIndices[OldDef] < NumOps); + MIB->tieOperands(NewIndices[OldDef], MIB->getNumOperands() - 1); + } + } } assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed"); // Add mem operands. @@ -248,8 +479,14 @@ MFI.getObjectAlign(FrameIndex)); NewMI->addMemOperand(MF, MMO); } + // Insert new statepoint and erase old one. MI.getParent()->insert(MI, NewMI); + + for (Register Reg : RegsToReload) + insertReloads(Reg); + + LLVM_DEBUG(dbgs() << "rewritten statepoint to : " << *NewMI << "\n"); MI.eraseFromParent(); } }; @@ -265,16 +502,22 @@ : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()), CacheFI(MF.getFrameInfo(), TRI) {} - bool process(MachineInstr &MI) { + StatepointProcessor(MachineFunction &MF, FrameIndexesCache &Cache) + : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()), CacheFI(Cache) {} + + bool process(MachineInstr &MI, bool AllowGCPtrInCSR) { StatepointOpers SO(&MI); uint64_t Flags = SO.getFlags(); // Do nothing for LiveIn, it supports all registers. if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn) return false; + LLVM_DEBUG(dbgs() << "\nMBB " << MI.getParent()->getNumber() << " " + << MI.getParent()->getName() << " : process statepoint " + << MI); CallingConv::ID CC = SO.getCallingConv(); const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC); CacheFI.reset(); - StatepointState SS(MI, Mask, CacheFI); + StatepointState SS(MI, Mask, CacheFI, AllowGCPtrInCSR); if (!SS.findRegistersToSpill()) return false; @@ -286,6 +529,80 @@ }; } // namespace +// Return live out definition of Reg in MBB or null +static MachineInstr *findLiveOutDef(Register Reg, MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI) { + for (auto I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { + // Special case for statepoint because we're looking specifically + // for explicit defs, not any implicit effects like regmask or register + // implicit def. + if (I->getOpcode() == TargetOpcode::STATEPOINT) + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + return nullptr; + if (MO.getReg() == Reg) + return &*I; + } + if (I->modifiesRegister(Reg, TRI)) + return &*I; + } + return nullptr; +} + +// For EH pad block with multiple predecessors check if its live-in +// registers are defined by statepoints in preds. If so, assign same +// spill slot for each register at each statepoint. +// NOTE: It works only if all reaching definitions of register are statepoints, +// otherwise we cannot insert reload into EH pad and must insert multiple +// reloads on edges. +// ASSUMPTION: live ranges of spill slots from different statepoints do not +// intersect. +void FixupStatepointCallerSaved::collectGlobalFIs( + MachineBasicBlock &BB, FrameIndexesCache &Cache, + const TargetRegisterInfo *TRI) { + // NOTE: I've seen dead registers marked as live-ins in block. + // That's OK for us, but if it is broke the other way + // (live register not in live-in), we're screwed up. + if (!BB.isEHPad() || BB.livein_empty() || BB.pred_size() == 1) + return; + SmallVector Preds(BB.predecessors()); + auto isStatepoint = [](MachineInstr *I) { + return I && I->getOpcode() == TargetOpcode::STATEPOINT; + }; + + // Resetting the cache allows us to reuse stack slots between + // different 'statepoint sets' (a set of statepoints reaching + // same EH Pad). This works under assumption that we allocate + // these 'global' spill slots before starting to process + // individual statepoints. + Cache.reset(); + + for (auto &LI : BB.liveins()) { + Register Reg = LI.PhysReg; + SmallVector RegDefs; + for (auto *B : Preds) + RegDefs.push_back(findLiveOutDef(Reg, B, TRI)); + if (llvm::all_of(RegDefs, isStatepoint)) { + int FI = Cache.getFrameIndex(Reg); + for (auto *Def : RegDefs) { + Cache.addGlobalSpillSlot(Reg, Def, FI); + LLVM_DEBUG(dbgs() << "EH Pad bb." << BB.getNumber() << ": reserving FI " + << FI << " to spill register " << printReg(Reg, TRI) + << " at statepoint in bb." + << Def->getParent()->getNumber() << "\n"); + } + } else { + // That spilling stuff is all-or-nothing: either all defining instructions + // are statepoints (and we can spill to the same slot) or none of them are + // statepoints (so we do not need any reloads). Otherwise we're in + // trouble. + assert(llvm::none_of(RegDefs, isStatepoint) && + "Cannot safely reload register"); + } + } +} + bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -294,18 +611,31 @@ if (!F.hasGC()) return false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + FrameIndexesCache FICache(MF.getFrameInfo(), *TRI); + SmallVector Statepoints; - for (MachineBasicBlock &BB : MF) + for (MachineBasicBlock &BB : MF) { + collectGlobalFIs(BB, FICache, TRI); for (MachineInstr &I : BB) if (I.getOpcode() == TargetOpcode::STATEPOINT) Statepoints.push_back(&I); + } if (Statepoints.empty()) return false; bool Changed = false; - StatepointProcessor SPP(MF); - for (MachineInstr *I : Statepoints) - Changed |= SPP.process(*I); + StatepointProcessor SPP(MF, FICache); + unsigned NumStatepoints = 0; + bool AllowGCPtrInCSR = PassGCPtrInCSR; + for (MachineInstr *I : Statepoints) { + ++NumStatepoints; + if (MaxStatepointsWithRegs.getNumOccurrences() && + NumStatepoints >= MaxStatepointsWithRegs) + AllowGCPtrInCSR = false; + + Changed |= SPP.process(*I, AllowGCPtrInCSR); + } return Changed; } diff --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-ALL %s +; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s ; This file contains a collection of basic tests to ensure we didn't ; screw up normal call lowering when there are no deopt or gc arguments. @@ -16,15 +17,15 @@ declare void @varargf(i32, ...) define i1 @test_i1_return() gc "statepoint-example" { -; CHECK-LABEL: test_i1_return: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq return_i1 -; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_i1_return: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq return_i1 +; CHECK-ALL-NEXT: .Ltmp0: +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq ; This is just checking that a i1 gets lowered normally when there's no extra ; state arguments to the statepoint entry: @@ -34,15 +35,15 @@ } define i32 @test_i32_return() gc "statepoint-example" { -; CHECK-LABEL: test_i32_return: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq return_i32 -; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_i32_return: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq return_i32 +; CHECK-ALL-NEXT: .Ltmp1: +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0) %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) @@ -50,15 +51,15 @@ } define i32* @test_i32ptr_return() gc "statepoint-example" { -; CHECK-LABEL: test_i32ptr_return: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq return_i32ptr -; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_i32ptr_return: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq return_i32ptr +; CHECK-ALL-NEXT: .Ltmp2: +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0) %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token) @@ -66,15 +67,15 @@ } define float @test_float_return() gc "statepoint-example" { -; CHECK-LABEL: test_float_return: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq return_float -; CHECK-NEXT: .Ltmp3: -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_float_return: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq return_float +; CHECK-ALL-NEXT: .Ltmp3: +; CHECK-ALL-NEXT: popq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0) %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) @@ -82,15 +83,15 @@ } define %struct @test_struct_return() gc "statepoint-example" { -; CHECK-LABEL: test_struct_return: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq return_struct -; CHECK-NEXT: .Ltmp4: -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_struct_return: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq return_struct +; CHECK-ALL-NEXT: .Ltmp4: +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0) %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) @@ -108,6 +109,22 @@ ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; CHECK-VREG-LABEL: test_relocate: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -16 +; CHECK-VREG-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: callq return_i1 +; CHECK-VREG-NEXT: .Ltmp5: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq ; Check that an ununsed relocate has no code-generation impact entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) @@ -117,17 +134,17 @@ } define void @test_void_vararg() gc "statepoint-example" { -; CHECK-LABEL: test_void_vararg: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movl $42, %edi -; CHECK-NEXT: movl $43, %esi -; CHECK-NEXT: callq varargf -; CHECK-NEXT: .Ltmp6: -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_void_vararg: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: movl $42, %edi +; CHECK-ALL-NEXT: movl $43, %esi +; CHECK-ALL-NEXT: callq varargf +; CHECK-ALL-NEXT: .Ltmp6: +; CHECK-ALL-NEXT: popq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq ; Check a statepoint wrapping a *void* returning vararg function works entry: %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) @@ -137,15 +154,15 @@ } define i1 @test_i1_return_patchable() gc "statepoint-example" { -; CHECK-LABEL: test_i1_return_patchable: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: nopl (%rax) -; CHECK-NEXT: .Ltmp7: -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_i1_return_patchable: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: nopl (%rax) +; CHECK-ALL-NEXT: .Ltmp7: +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq ; A patchable variant of test_i1_return entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0) @@ -188,6 +205,44 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; CHECK-VREG-LABEL: test_cross_bb: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %rbp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: pushq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 48 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -32 +; CHECK-VREG-NEXT: .cfi_offset %r14, -24 +; CHECK-VREG-NEXT: .cfi_offset %rbp, -16 +; CHECK-VREG-NEXT: movl %esi, %ebp +; CHECK-VREG-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: callq return_i1 +; CHECK-VREG-NEXT: .Ltmp8: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: testb $1, %bpl +; CHECK-VREG-NEXT: je .LBB8_2 +; CHECK-VREG-NEXT: # %bb.1: # %left +; CHECK-VREG-NEXT: movl %eax, %r14d +; CHECK-VREG-NEXT: movq %rbx, %rdi +; CHECK-VREG-NEXT: callq consume +; CHECK-VREG-NEXT: movl %r14d, %eax +; CHECK-VREG-NEXT: jmp .LBB8_3 +; CHECK-VREG-NEXT: .LBB8_2: # %right +; CHECK-VREG-NEXT: movb $1, %al +; CHECK-VREG-NEXT: .LBB8_3: # %right +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: popq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq entry: %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) br i1 %external_cond, label %left, label %right @@ -207,31 +262,31 @@ declare void @consume_attributes(i32, i8* nest, i32, %struct2* byval) define void @test_attributes(%struct2* byval %s) gc "statepoint-example" { -; CHECK-LABEL: test_attributes: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $8, %rsp -; CHECK-NEXT: .cfi_adjust_cfa_offset 8 -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: movl $42, %edi -; CHECK-NEXT: xorl %r10d, %r10d -; CHECK-NEXT: movl $17, %esi -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_adjust_cfa_offset 8 -; CHECK-NEXT: pushq %rdx -; CHECK-NEXT: .cfi_adjust_cfa_offset 8 -; CHECK-NEXT: pushq %rcx -; CHECK-NEXT: .cfi_adjust_cfa_offset 8 -; CHECK-NEXT: callq consume_attributes -; CHECK-NEXT: .Ltmp9: -; CHECK-NEXT: addq $32, %rsp -; CHECK-NEXT: .cfi_adjust_cfa_offset -32 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_attributes: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: subq $8, %rsp +; CHECK-ALL-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK-ALL-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-ALL-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-ALL-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-ALL-NEXT: movl $42, %edi +; CHECK-ALL-NEXT: xorl %r10d, %r10d +; CHECK-ALL-NEXT: movl $17, %esi +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK-ALL-NEXT: pushq %rdx +; CHECK-ALL-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK-ALL-NEXT: pushq %rcx +; CHECK-ALL-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK-ALL-NEXT: callq consume_attributes +; CHECK-ALL-NEXT: .Ltmp9: +; CHECK-ALL-NEXT: addq $32, %rsp +; CHECK-ALL-NEXT: .cfi_adjust_cfa_offset -32 +; CHECK-ALL-NEXT: popq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: ; Check that arguments with attributes are lowered correctly. ; We call a function that has a nest argument and a byval argument. diff --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll --- a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll +++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-ALL %s +; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s ; Check that we can export values of "duplicated" gc.relocates without a crash ; "duplicate" here means maps to same SDValue. We previously had an @@ -12,18 +13,18 @@ declare void @func() define i1 @test() gc "statepoint-example" { -; CHECK-LABEL: test: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq func -; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: callq func -; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: callq func +; CHECK-ALL-NEXT: .Ltmp0: +; CHECK-ALL-NEXT: callq func +; CHECK-ALL-NEXT: .Ltmp1: +; CHECK-ALL-NEXT: movb $1, %al +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq entry: %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null, i32 addrspace(1)* null) %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) diff --git a/llvm/test/CodeGen/X86/statepoint-invoke.ll b/llvm/test/CodeGen/X86/statepoint-invoke.ll --- a/llvm/test/CodeGen/X86/statepoint-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-invoke.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-ALL %s +; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s target triple = "x86_64-pc-linux-gnu" @@ -31,6 +32,41 @@ ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; CHECK-VREG-LABEL: test_basic: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: subq $24, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 48 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -24 +; CHECK-VREG-NEXT: .cfi_offset %r14, -16 +; CHECK-VREG-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: .Ltmp0: +; CHECK-VREG-NEXT: callq some_call +; CHECK-VREG-NEXT: .Ltmp3: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 +; CHECK-VREG-NEXT: .Ltmp1: +; CHECK-VREG-NEXT: # %bb.1: # %normal_return +; CHECK-VREG-NEXT: movq %rbx, %rax +; CHECK-VREG-NEXT: .LBB0_2: # %normal_return +; CHECK-VREG-NEXT: addq $24, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq +; CHECK-VREG-NEXT: .LBB0_3: # %exceptional_return +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 48 +; CHECK-VREG-NEXT: .Ltmp2: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 +; CHECK-VREG-NEXT: movq %r14, %rax +; CHECK-VREG-NEXT: jmp .LBB0_2 i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" { entry: @@ -52,11 +88,11 @@ %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14) ret i64 addrspace(1)* %obj1.relocated1 } -; CHECK-LABEL: GCC_except_table{{[0-9]+}}: -; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} -; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} -; CHECK: .byte 0 -; CHECK: .p2align 4 +; CHECK-ALL-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK-ALL: .byte 0 +; CHECK-ALL: .p2align 4 define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, ; CHECK-LABEL: test_result: @@ -79,6 +115,31 @@ ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; CHECK-VREG-LABEL: test_result: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -16 +; CHECK-VREG-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: .Ltmp4: +; CHECK-VREG-NEXT: callq some_other_call +; CHECK-VREG-NEXT: .Ltmp7: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: .Ltmp5: +; CHECK-VREG-NEXT: .LBB1_1: # %normal_return +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq +; CHECK-VREG-NEXT: .LBB1_2: # %exceptional_return +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .Ltmp6: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq %rbx, %rax +; CHECK-VREG-NEXT: jmp .LBB1_1 i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality_function { entry: @@ -95,11 +156,11 @@ %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13) ret i64 addrspace(1)* %obj.relocated } -; CHECK-LABEL: GCC_except_table{{[0-9]+}}: -; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} -; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} -; CHECK: .byte 0 -; CHECK: .p2align 4 +; CHECK-ALL-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK-ALL: .byte 0 +; CHECK-ALL: .p2align 4 define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) ; CHECK-LABEL: test_same_val: @@ -153,6 +214,80 @@ ; CHECK-NEXT: .Ltmp13: ; CHECK-NEXT: movq (%rsp), %rax ; CHECK-NEXT: jmp .LBB2_6 +; CHECK-VREG-LABEL: test_same_val: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %rbp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: pushq %r15 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: pushq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 40 +; CHECK-VREG-NEXT: subq $24, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 64 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -40 +; CHECK-VREG-NEXT: .cfi_offset %r14, -32 +; CHECK-VREG-NEXT: .cfi_offset %r15, -24 +; CHECK-VREG-NEXT: .cfi_offset %rbp, -16 +; CHECK-VREG-NEXT: movq %rdx, %rbx +; CHECK-VREG-NEXT: movq %rsi, %rbp +; CHECK-VREG-NEXT: movl %edi, %r14d +; CHECK-VREG-NEXT: testb $1, %r14b +; CHECK-VREG-NEXT: je .LBB2_2 +; CHECK-VREG-NEXT: # %bb.1: # %left +; CHECK-VREG-NEXT: .Ltmp11: +; CHECK-VREG-NEXT: movq %rbp, %rdi +; CHECK-VREG-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: callq some_call +; CHECK-VREG-NEXT: .Ltmp14: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: .Ltmp12: +; CHECK-VREG-NEXT: jmp .LBB2_4 +; CHECK-VREG-NEXT: .LBB2_2: # %right +; CHECK-VREG-NEXT: movq %rcx, %r15 +; CHECK-VREG-NEXT: .Ltmp8: +; CHECK-VREG-NEXT: movq %rbp, %rdi +; CHECK-VREG-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: callq some_call +; CHECK-VREG-NEXT: .Ltmp15: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 +; CHECK-VREG-NEXT: .Ltmp9: +; CHECK-VREG-NEXT: # %bb.3: # %right.relocs +; CHECK-VREG-NEXT: movq %r15, %rbp +; CHECK-VREG-NEXT: .LBB2_4: # %normal_return +; CHECK-VREG-NEXT: testb $1, %r14b +; CHECK-VREG-NEXT: cmoveq %rbx, %rbp +; CHECK-VREG-NEXT: .LBB2_5: # %normal_return +; CHECK-VREG-NEXT: movq %rbp, %rax +; CHECK-VREG-NEXT: .LBB2_6: # %normal_return +; CHECK-VREG-NEXT: addq $24, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 40 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: popq %r14 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: popq %r15 +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq +; CHECK-VREG-NEXT: .LBB2_8: # %exceptional_return.right +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 64 +; CHECK-VREG-NEXT: .Ltmp10: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 +; CHECK-VREG-NEXT: movq %rbx, %rax +; CHECK-VREG-NEXT: jmp .LBB2_6 +; CHECK-VREG-NEXT: .LBB2_7: # %exceptional_return.left +; CHECK-VREG-NEXT: .Ltmp13: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: jmp .LBB2_5 gc "statepoint-example" personality i32 ()* @"personality_function" { entry: br i1 %cond, label %left, label %right @@ -195,23 +330,23 @@ } define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) -; CHECK-LABEL: test_null_undef: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .Ltmp16: -; CHECK-NEXT: callq some_call -; CHECK-NEXT: .Ltmp19: -; CHECK-NEXT: .Ltmp17: -; CHECK-NEXT: .LBB3_1: # %normal_return -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB3_2: # %exceptional_return -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .Ltmp18: -; CHECK-NEXT: jmp .LBB3_1 +; CHECK-ALL-LABEL: test_null_undef: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: .Ltmp16: +; CHECK-ALL-NEXT: callq some_call +; CHECK-ALL-NEXT: .Ltmp19: +; CHECK-ALL-NEXT: .Ltmp17: +; CHECK-ALL-NEXT: .LBB3_1: # %normal_return +; CHECK-ALL-NEXT: xorl %eax, %eax +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq +; CHECK-ALL-NEXT: .LBB3_2: # %exceptional_return +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: .Ltmp18: +; CHECK-ALL-NEXT: jmp .LBB3_1 gc "statepoint-example" personality i32 ()* @"personality_function" { entry: %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef) @@ -231,26 +366,26 @@ } define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) -; CHECK-LABEL: test_alloca_and_const: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .Ltmp20: -; CHECK-NEXT: callq some_call -; CHECK-NEXT: .Ltmp23: -; CHECK-NEXT: .Ltmp21: -; CHECK-NEXT: # %bb.1: # %normal_return -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB4_2: # %exceptional_return -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .Ltmp22: -; CHECK-NEXT: movl $15, %eax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_alloca_and_const: +; CHECK-ALL: # %bb.0: # %entry +; CHECK-ALL-NEXT: pushq %rax +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: .Ltmp20: +; CHECK-ALL-NEXT: callq some_call +; CHECK-ALL-NEXT: .Ltmp23: +; CHECK-ALL-NEXT: .Ltmp21: +; CHECK-ALL-NEXT: # %bb.1: # %normal_return +; CHECK-ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq +; CHECK-ALL-NEXT: .LBB4_2: # %exceptional_return +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ALL-NEXT: .Ltmp22: +; CHECK-ALL-NEXT: movl $15, %eax +; CHECK-ALL-NEXT: popq %rcx +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq gc "statepoint-example" personality i32 ()* @"personality_function" { entry: %a = alloca i32 diff --git a/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll b/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll --- a/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll +++ b/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -use-registers-for-gcptrs=true | FileCheck --check-prefix=CHECK-VREG %s define i8 addrspace(1)* @no_extra_const(i8 addrspace(1)* %obj) gc "statepoint-example" { ; CHECK-LABEL: no_extra_const: @@ -13,6 +14,23 @@ ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; CHECK-VREG-LABEL: no_extra_const: +; CHECK-VREG: # %bb.0: # %entry +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -16 +; CHECK-VREG-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) +; CHECK-VREG-NEXT: nopl 8(%rax) +; CHECK-VREG-NEXT: .Ltmp0: +; CHECK-VREG-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx +; CHECK-VREG-NEXT: movq %rbx, %rax +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq entry: %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 4, void ()* null, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll --- a/llvm/test/CodeGen/X86/statepoint-regs.ll +++ b/llvm/test/CodeGen/X86/statepoint-regs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck --check-prefixes=CHECK,CHECK-SPILL %s +; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK,CHECK-VREG %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" @@ -97,23 +98,48 @@ ; A gc-value must be spilled even if it is also a deopt value. define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" { -; CHECK-LABEL: test5: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $16, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: callq _bar -; CHECK-NEXT: Ltmp5: -; CHECK-NEXT: callq _bar -; CHECK-NEXT: Ltmp6: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-SPILL-LABEL: test5: +; CHECK-SPILL: ## %bb.0: ## %entry +; CHECK-SPILL-NEXT: pushq %rbx +; CHECK-SPILL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SPILL-NEXT: subq $16, %rsp +; CHECK-SPILL-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SPILL-NEXT: .cfi_offset %rbx, -16 +; CHECK-SPILL-NEXT: movl %edi, %ebx +; CHECK-SPILL-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-SPILL-NEXT: callq _bar +; CHECK-SPILL-NEXT: Ltmp5: +; CHECK-SPILL-NEXT: callq _bar +; CHECK-SPILL-NEXT: Ltmp6: +; CHECK-SPILL-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-SPILL-NEXT: addq $16, %rsp +; CHECK-SPILL-NEXT: popq %rbx +; CHECK-SPILL-NEXT: retq + +; CHECK-VREG-LABEL: test5: +; CHECK-VREG: ## %bb.0: ## %entry +; CHECK-VREG-NEXT: pushq %rbp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 24 +; CHECK-VREG-NEXT: pushq %rax +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -24 +; CHECK-VREG-NEXT: .cfi_offset %rbp, -16 +; CHECK-VREG-NEXT: movq %rsi, (%rsp) +; CHECK-VREG-NEXT: movl %edi, %ebp +; CHECK-VREG-NEXT: callq _bar +; CHECK-VREG-NEXT: Ltmp5: +; CHECK-VREG-NEXT: movq (%rsp), %rbx +; CHECK-VREG-NEXT: movq %rbx, (%rsp) +; CHECK-VREG-NEXT: callq _bar +; CHECK-VREG-NEXT: Ltmp6: +; CHECK-VREG-NEXT: movq (%rsp), %rbx +; CHECK-VREG-NEXT: movq %rbx, %rax +; CHECK-VREG-NEXT: addq $8, %rsp +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: popq %rbp +; CHECK-VREG-NEXT: retq entry: %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p) %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9) diff --git a/llvm/test/CodeGen/X86/statepoint-uniqueing.ll b/llvm/test/CodeGen/X86/statepoint-uniqueing.ll --- a/llvm/test/CodeGen/X86/statepoint-uniqueing.ll +++ b/llvm/test/CodeGen/X86/statepoint-uniqueing.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK-SPILL,CHECK %s +; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" @@ -12,20 +13,41 @@ ;; Two gc.relocates of the same input, should require only a single spill/fill define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { -; CHECK-LABEL: test_gcrelocate_uniqueing: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: callq f -; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; CHECK-NEXT: movq %rdi, %rsi -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: callq use -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_gcrelocate_uniqueing: +; CHECK-ALL: # %bb.0: +; CHECK-ALL-NEXT: subq $24, %rsp +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 32 +; CHECK-ALL-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-ALL-NEXT: callq f +; CHECK-ALL-NEXT: .Ltmp0: +; CHECK-ALL-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-ALL-NEXT: movq %rdi, %rsi +; CHECK-ALL-NEXT: xorl %eax, %eax +; CHECK-ALL-NEXT: callq use +; CHECK-ALL-NEXT: addq $24, %rsp +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq + +; CHECK-VREG-LABEL: test_gcrelocate_uniqueing: +; CHECK-VREG: # %bb.0: +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -16 +; CHECK-VREG-NEXT: movq %rdi, (%rsp) +; CHECK-VREG-NEXT: callq f +; CHECK-VREG-NEXT: .Ltmp0: +; CHECK-VREG-NEXT: movq (%rsp), %rbx +; CHECK-VREG-NEXT: movq %rbx, %rdi +; CHECK-VREG-NEXT: movq %rbx, %rsi +; CHECK-VREG-NEXT: xorl %eax, %eax +; CHECK-VREG-NEXT: callq use +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq %tok = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 undef, i32 addrspace(1)* %ptr, i32 addrspace(1)* %ptr) %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 9, i32 9) @@ -36,20 +58,41 @@ ;; Two gc.relocates of a bitcasted pointer should only require a single spill/fill define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { -; CHECK-LABEL: test_gcptr_uniqueing: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: callq f -; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; CHECK-NEXT: movq %rdi, %rsi -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: callq use -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-ALL-LABEL: test_gcptr_uniqueing: +; CHECK-ALL: # %bb.0: +; CHECK-ALL-NEXT: subq $24, %rsp +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 32 +; CHECK-ALL-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-ALL-NEXT: callq f +; CHECK-ALL-NEXT: .Ltmp1: +; CHECK-ALL-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-ALL-NEXT: movq %rdi, %rsi +; CHECK-ALL-NEXT: xorl %eax, %eax +; CHECK-ALL-NEXT: callq use +; CHECK-ALL-NEXT: addq $24, %rsp +; CHECK-ALL-NEXT: .cfi_def_cfa_offset 8 +; CHECK-ALL-NEXT: retq + +; CHECK-VREG-LABEL: test_gcptr_uniqueing: +; CHECK-VREG: # %bb.0: +; CHECK-VREG-NEXT: pushq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: subq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 32 +; CHECK-VREG-NEXT: .cfi_offset %rbx, -16 +; CHECK-VREG-NEXT: movq %rdi, (%rsp) +; CHECK-VREG-NEXT: callq f +; CHECK-VREG-NEXT: .Ltmp1: +; CHECK-VREG-NEXT: movq (%rsp), %rbx +; CHECK-VREG-NEXT: movq %rbx, %rdi +; CHECK-VREG-NEXT: movq %rbx, %rsi +; CHECK-VREG-NEXT: xorl %eax, %eax +; CHECK-VREG-NEXT: callq use +; CHECK-VREG-NEXT: addq $16, %rsp +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-VREG-NEXT: popq %rbx +; CHECK-VREG-NEXT: .cfi_def_cfa_offset 8 +; CHECK-VREG-NEXT: retq %ptr2 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tok = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 undef, i32 addrspace(1)* %ptr, i8 addrspace(1)* %ptr2)