Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -87,8 +87,8 @@ - -fstack-clash-protection will provide a protection against the stack clash - attack for x86 architecture through automatic probing of each page of - allocated stack. + attack for x86 and s390x architectures through automatic probing of each page + of allocated stack. - -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify the floating-point exception behavior. The default setting is ``ignore``. Index: clang/lib/Basic/Targets/SystemZ.h =================================================================== --- clang/lib/Basic/Targets/SystemZ.h +++ clang/lib/Basic/Targets/SystemZ.h @@ -64,6 +64,10 @@ ArrayRef getGCCAddlRegNames() const override; + bool isSPRegName(StringRef RegName) const override { + return RegName.equals("r15"); + } + bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -3037,7 +3037,7 @@ if (!EffectiveTriple.isOSLinux()) return; - if (!EffectiveTriple.isX86()) + if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ()) return; if (Args.hasFlag(options::OPT_fstack_clash_protection, Index: clang/test/CodeGen/stack-clash-protection.c =================================================================== --- clang/test/CodeGen/stack-clash-protection.c +++ clang/test/CodeGen/stack-clash-protection.c @@ -1,5 +1,6 @@ // Check the correct function attributes are generated // RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s // CHECK: define void @large_stack() #[[A:.*]] { void large_stack() { Index: clang/test/Driver/stack-clash-protection-02.c =================================================================== --- /dev/null +++ clang/test/Driver/stack-clash-protection-02.c @@ -0,0 +1,13 @@ +// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ +// SystemZ: "-fstack-clash-protection" +// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn +// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash + +int foo(int c) { + int r; + __asm__("ag %%r15, %0" + : + : "rm"(c) + : "r15"); + return r; +} Index: llvm/include/llvm/ADT/Triple.h =================================================================== --- llvm/include/llvm/ADT/Triple.h +++ llvm/include/llvm/ADT/Triple.h @@ -736,6 +736,11 @@ return getArch() == Triple::riscv32 || getArch() == Triple::riscv64; } + /// Tests whether the target is SystemZ. + bool isSystemZ() const { + return getArch() == Triple::systemz; + } + /// Tests whether the target is x86 (32- or 64-bit). bool isX86() const { return getArch() == Triple::x86 || getArch() == Triple::x86_64; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -43,6 +43,8 @@ RegScavenger *RS) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -373,6 +373,32 @@ } } +// Add CFI for the new CFA offset. +static void buildCFAOffs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, int Offset, + const SystemZInstrInfo *ZII) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +// Add CFI for the new frame location. +static void buildDefCFAReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Reg, + const SystemZInstrInfo *ZII) { + MachineFunction &MF = *MBB.getParent(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + void SystemZFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); @@ -461,13 +487,23 @@ // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); - emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); - - // Add CFI for the allocation. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { + // stack probing may involve looping, and control flow generations is + // disallowed at this point. Rely to later processing through + // `inlineStackProbe`. + MachineInstr *Stub = BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL)) + .addExternalSymbol("__chkstk_stub"); + + // Encode the static offset as a metadata attached to the stub. + LLVMContext &Context = MF.getFunction().getContext(); + MachineInstrBuilder(MF, Stub).addMetadata( + MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get( + IntegerType::get(Context, 64), StackSize))})); + } + else { + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + } SPOffsetFromCFA += Delta; if (StoreBackchain) { @@ -485,11 +521,7 @@ .addReg(SystemZ::R15D); // Add CFI for the new frame location. - unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaRegister(nullptr, HardFP)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII); // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R11 as live on entry when @@ -582,6 +614,102 @@ } } +void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast(MF.getSubtarget().getInstrInfo()); + const SystemZSubtarget &STI = MF.getSubtarget(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + + const StringRef ChkStkStubSymbol = "__chkstk_stub"; + MachineInstr *ChkStkStubMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.isCall() && MI.getOperand(0).isSymbol() && + ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) { + ChkStkStubMI = &MI; + break; + } + if (ChkStkStubMI == nullptr) + return; + assert(ChkStkStubMI->getOperand(1).isMetadata() && + "no metadata attached to that probe"); + uint64_t StackSize = + cast( + cast( + cast(ChkStkStubMI->getOperand(1).getMetadata()) + ->getOperand(0)) + ->getValue()) + ->getZExtValue(); + + const unsigned ProbeSize = TLI.getStackProbeSize(MF); + uint64_t NumFullBlocks = StackSize / ProbeSize; + uint64_t Residual = StackSize % ProbeSize; + int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; + MachineBasicBlock *MBB = &PrologMBB; + MachineBasicBlock::iterator MBBI = ChkStkStubMI; + const DebugLoc DL = ChkStkStubMI->getDebugLoc(); + + // Allocate a block of Size bytes on the stack and probe it. + auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, + MachineBasicBlock::iterator InsPt, unsigned Size, + bool EmitCFI) -> void { + emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII); + if (EmitCFI) { + SPOffsetFromCFA -= Size; + buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII); + } + // Probe by means of a volatile compare. + MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R0D, RegState::Undef) + .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0) + .addMemOperand(MMO); + }; + + if (NumFullBlocks < 3) { + // Emit unrolled probe statements. + for (unsigned int i = 0; i < NumFullBlocks; i++) + allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/); + } else { + // Emit a loop probing the pages. + uint64_t LoopAlloc = ProbeSize * NumFullBlocks; + SPOffsetFromCFA -= LoopAlloc; + + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) + .addReg(SystemZ::R15D); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); + buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc), + ZII); + + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB); + MBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + MBB = LoopMBB; + allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) + .addReg(SystemZ::R15D).addReg(SystemZ::R1D); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); + + MBB = DoneMBB; + MBBI = DoneMBB->begin(); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); + + recomputeLiveIns(*DoneMBB); + recomputeLiveIns(*LoopMBB); + } + + if (Residual) + allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + + ChkStkStubMI->eraseFromParent(); +} + bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects() || Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -83,6 +83,10 @@ // base of the dynamically-allocatable area. ADJDYNALLOC, + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + // Count number of bits set in operand 0 per byte. POPCNT, @@ -428,6 +432,7 @@ EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; + bool hasInlineStackProbe(MachineFunction &MF) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -556,6 +561,8 @@ return true; } + unsigned getStackProbeSize(MachineFunction &MF) const; + private: const SystemZSubtarget &Subtarget; @@ -690,6 +697,8 @@ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; + MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, + MachineBasicBlock *MBB) const; MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -824,6 +824,15 @@ return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } +/// Returns true if stack probing through inline assembly is requested. +bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { + // If the function specifically requests inline stack probes, emit them. + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == + "inline-asm"; + return false; +} + bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); @@ -3426,10 +3435,17 @@ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. - SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); - - // Copy the new stack pointer back. - Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + SDValue NewSP; + if (hasInlineStackProbe(MF)) { + NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, + DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); + Chain = NewSP.getValue(1); + } + else { + NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + } // The allocated data lives above the 160 bytes allocated for the standard // frame, plus any outgoing stack arguments. We don't know how much that @@ -5343,6 +5359,7 @@ OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); + OPCODE(PROBED_ALLOCA); OPCODE(POPCNT); OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); @@ -6738,38 +6755,23 @@ return 1; } +unsigned +SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// -// Create a new basic block after MBB. -static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { - MachineFunction &MF = *MBB->getParent(); - MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); - MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); - return NewMBB; -} - -// Split MBB after MI and return the new block (the one that contains -// instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, - MachineBasicBlock *MBB) { - MachineBasicBlock *NewMBB = emitBlockAfter(MBB); - NewMBB->splice(NewMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - NewMBB->transferSuccessorsAndUpdatePHIs(MBB); - return NewMBB; -} - -// Split MBB before MI and return the new block (the one that contains MI). -static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, - MachineBasicBlock *MBB) { - MachineBasicBlock *NewMBB = emitBlockAfter(MBB); - NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); - NewMBB->transferSuccessorsAndUpdatePHIs(MBB); - return NewMBB; -} - // Force base value Base into a register before MI. Return the register. static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { @@ -6940,8 +6942,8 @@ bool CCKilled = (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB); - MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. @@ -7034,8 +7036,8 @@ CCMask ^= CCValid; MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the CondStore instruction, mark it as // live-in to both FalseMBB and JoinMBB. @@ -7118,8 +7120,8 @@ // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // ... @@ -7236,10 +7238,10 @@ // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); - MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); + MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); // StartMBB: // ... @@ -7347,9 +7349,9 @@ // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); // StartMBB: // ... @@ -7509,7 +7511,7 @@ // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? - splitBlockAfter(MI, MBB) : nullptr); + SystemZ::splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI.getNumExplicitOperands() > 5) { @@ -7533,9 +7535,10 @@ Register NextCountReg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); - MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = + (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMMB @@ -7651,7 +7654,7 @@ // If there's another CLC to go, branch to the end if a difference // was found. if (EndMBB && Length > 0) { - MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); @@ -7691,8 +7694,8 @@ uint64_t End2Reg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); - MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // # fall through to LoopMMB @@ -7803,6 +7806,97 @@ return MBB; } +MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( + MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + DebugLoc DL = MI.getDebugLoc(); + const unsigned ProbeSize = getStackProbeSize(MF); + Register DstReg = MI.getOperand(0).getReg(); + Register SizeReg = MI.getOperand(2).getReg(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); + MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); + MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); + MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); + + MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + + Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + + // LoopTestMBB + // BRC TailTestMBB + // # fallthrough to LoopBodyMBB + StartMBB->addSuccessor(LoopTestMBB); + MBB = LoopTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) + .addReg(SizeReg) + .addMBB(StartMBB) + .addReg(IncReg) + .addMBB(LoopBodyMBB); + BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(TailTestMBB); + MBB->addSuccessor(LoopBodyMBB); + MBB->addSuccessor(TailTestMBB); + + // LoopBodyMBB: Allocate and probe by means of a volatile compare. + // J LoopTestMBB + MBB = LoopBodyMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) + .addReg(PHIReg) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addImm(ProbeSize); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) + .setMemRefs(VolLdMMO); + BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); + MBB->addSuccessor(LoopTestMBB); + + // TailTestMBB + // BRC DoneMBB + // # fallthrough to TailMBB + MBB = TailTestMBB; + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(PHIReg) + .addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) + .addMBB(DoneMBB); + MBB->addSuccessor(TailMBB); + MBB->addSuccessor(DoneMBB); + + // TailMBB + // # fallthrough to DoneMBB + MBB = TailMBB; + BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addReg(PHIReg); + BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) + .setMemRefs(VolLdMMO); + MBB->addSuccessor(DoneMBB); + + // DoneMBB + MBB = DoneMBB; + BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(SystemZ::R15D); + + MI.eraseFromParent(); + return DoneMBB; +} + MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { @@ -8063,6 +8157,9 @@ case SystemZ::LTXBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + case SystemZ::PROBED_ALLOCA: + return emitProbedAlloca(MI, MBB); + case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -159,6 +159,16 @@ // Return a version of comparison CC mask CCMask in which the LT and GT // actions are swapped. unsigned reverseCCMask(unsigned CCMask); + +// Create a new basic block after MBB. +MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB); +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); +// Split MBB before MI and return the new block (the one that contains MI). +MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB); } class SystemZInstrInfo : public SystemZGenInstrInfo { Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1808,6 +1808,30 @@ (CCMask & SystemZ::CCMASK_CMP_UO)); } +MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const { if (!STI.hasLoadAndTrap()) return 0; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -29,6 +29,11 @@ def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), [(set GR64:$dst, dynalloc12only:$src)]>; +let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + usesCustomInserter = 1 in + def PROBED_ALLOCA : Pseudo<(outs GR64:$dst), + (ins GR64:$oldSP, GR64:$space), + [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>; //===----------------------------------------------------------------------===// // Branch instructions Index: llvm/lib/Target/SystemZ/SystemZOperators.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZOperators.td +++ llvm/lib/Target/SystemZ/SystemZOperators.td @@ -40,6 +40,10 @@ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZProbedAlloca : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>, @@ -269,6 +273,8 @@ SDT_ZSelectCCMask>; def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, + [SDNPHasChain]>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; Index: llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo(i32 %n) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2 +; CHECK-NEXT: la %r0, 7(%r1) +; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: cgije %r1, 0, .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: cg %r15, -8(%r1,%r15) +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: la %r1, 160(%r15) +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: sty %r0, 4792(%r1) +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i32 %n + %b = getelementptr inbounds i32, i32* %a, i64 1198 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-large.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-large.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: agfi %r1, -69632 +; CHECK-NEXT: .cfi_def_cfa_offset 69792 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: aghi %r15, -2544 +; CHECK-NEXT: .cfi_def_cfa_offset 72336 +; CHECK-NEXT: cg %r0, 2536(%r15) +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: mvhi 568(%r15), 1 +; CHECK-NEXT: sty %r0, 28968(%r15) +; CHECK-NEXT: l %r2, 176(%r15) +; CHECK-NEXT: agfi %r15, 72176 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 18000 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 7198 + store volatile i32 1, i32* %b0 + store volatile i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes-mutliple-objects.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -2080 +; CHECK-NEXT: .cfi_def_cfa_offset 6336 +; CHECK-NEXT: lay %r1, 4096(%r15) +; CHECK-NEXT: cg %r0, 2072(%r15) +; CHECK-NEXT: mvhi 80(%r1), 1 +; CHECK-NEXT: mvhi 976(%r15), 2 +; CHECK-NEXT: l %r2, 2176(%r15) +; CHECK-NEXT: aghi %r15, 6176 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 1000 + %b = alloca i32, i64 500 + %a0 = getelementptr inbounds i32, i32* %a, i64 500 + %b0 = getelementptr inbounds i32, i32* %b, i64 200 + store volatile i32 1, i32* %a0 + store volatile i32 2, i32* %b0 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -4080 +; CHECK-NEXT: .cfi_def_cfa_offset 8336 +; CHECK-NEXT: cg %r0, 4072(%r15) +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: mvhi 568(%r15), 1 +; CHECK-NEXT: sty %r0, 4968(%r15) +; CHECK-NEXT: l %r2, 176(%r15) +; CHECK-NEXT: aghi %r15, 8176 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 2000 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 1198 + store i32 1, i32* %b0 + store i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -4080 +; CHECK-NEXT: .cfi_def_cfa_offset 8336 +; CHECK-NEXT: cg %r0, 4072(%r15) +; CHECK-NEXT: mvhi 976(%r15), 1 +; CHECK-NEXT: l %r2, 176(%r15) +; CHECK-NEXT: aghi %r15, 8176 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 2000 + %b = getelementptr inbounds i32, i32* %a, i64 200 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-no-free-probe.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-no-free-probe.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo(i64 %i) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -4080 +; CHECK-NEXT: .cfi_def_cfa_offset 8336 +; CHECK-NEXT: cg %r0, 4072(%r15) +; CHECK-NEXT: sllg %r1, %r2, 2 +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: st %r0, 176(%r1,%r15) +; CHECK-NEXT: l %r2, 176(%r15) +; CHECK-NEXT: aghi %r15, 8176 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i32 2000 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} + Index: llvm/test/CodeGen/SystemZ/stack-clash-protection.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-protection.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s + +; Small enough to avoid loop. +define void @fun0() #0 { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -88 +; CHECK-NEXT: .cfi_def_cfa_offset 4344 +; CHECK-NEXT: cg %r0, 80(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 4184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [1000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [1000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Uses a loop to allocate and probe in steps. +define void @fun1() #0 { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset 28832 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: aghi %r15, -3512 +; CHECK-NEXT: .cfi_def_cfa_offset 32344 +; CHECK-NEXT: cg %r0, 3504(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 32184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [8000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [8000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Loop with bigger step. +define void @fun2() #0 "stack-probe-size"="8192" { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -24576 +; CHECK-NEXT: .cfi_def_cfa_offset 24736 +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -8192 +; CHECK-NEXT: cg %r0, 8184(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: aghi %r15, -7608 +; CHECK-NEXT: .cfi_def_cfa_offset 32344 +; CHECK-NEXT: cg %r0, 7600(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 32184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [8000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [8000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Ends evenly on the step so no remainder needed. +define void @fun3() #0 { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset 28832 +; CHECK-NEXT: .LBB3_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 28672 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [7122 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [7122 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +attributes #0 = { "probe-stack"="inline-asm" } + Index: llvm/test/CodeGen/SystemZ/stack-clash-small.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-small.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r15, -560 +; CHECK-NEXT: .cfi_def_cfa_offset 720 +; CHECK-NEXT: cg %r0, 552(%r15) +; CHECK-NEXT: mvhi 552(%r15), 1 +; CHECK-NEXT: l %r2, 160(%r15) +; CHECK-NEXT: aghi %r15, 560 +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 100 + %b = getelementptr inbounds i32, i32* %a, i64 98 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-unknown-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-unknown-call.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare void @fun(i8* nocapture writeonly); + +define void @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -4080 +; CHECK-NEXT: .cfi_def_cfa_offset 8336 +; CHECK-NEXT: cg %r0, 4072(%r15) +; CHECK-NEXT: la %r2, 176(%r15) +; CHECK-NEXT: brasl %r14, fun@PLT +; CHECK-NEXT: lmg %r14, %r15, 8288(%r15) +; CHECK-NEXT: br %r14 +; it's important that we don't use the call as a probe here + + %a = alloca i8, i64 8000 + call void @fun(i8* align 16 %a) + ret void +} + +attributes #0 = {"probe-stack"="inline-asm"}