Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -75,8 +75,8 @@ - -fstack-clash-protection will provide a protection against the stack clash - attack for x86 architecture through automatic probing of each page of - allocated stack. + attack for x86 and s390x architectures through automatic probing of each page + of allocated stack. - -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify the floating-point exception behavior. The default setting is ``ignore``. Index: clang/lib/Basic/Targets/SystemZ.h =================================================================== --- clang/lib/Basic/Targets/SystemZ.h +++ clang/lib/Basic/Targets/SystemZ.h @@ -64,6 +64,10 @@ ArrayRef getGCCAddlRegNames() const override; + bool isSPRegName(StringRef RegName) const override { + return RegName.equals("r15"); + } + bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -3036,7 +3036,7 @@ if (!EffectiveTriple.isOSLinux()) return; - if (!EffectiveTriple.isX86()) + if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ()) return; if (Args.hasFlag(options::OPT_fstack_clash_protection, Index: clang/test/CodeGen/stack-clash-protection.c =================================================================== --- clang/test/CodeGen/stack-clash-protection.c +++ clang/test/CodeGen/stack-clash-protection.c @@ -1,5 +1,6 @@ // Check the correct function attributes are generated // RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s +// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s // CHECK: define void @large_stack() #[[A:.*]] { void large_stack() { Index: clang/test/Driver/stack-clash-protection-02.c =================================================================== --- /dev/null +++ clang/test/Driver/stack-clash-protection-02.c @@ -0,0 +1,13 @@ +// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -### %s 2>&1 | FileCheck %s -check-prefix=SystemZ +// SystemZ: "-fstack-clash-protection" +// RUN: %clang -target s390x-linux-gnu -fstack-clash-protection -S -emit-llvm -o %t.ll %s 2>&1 | FileCheck %s -check-prefix=SystemZ-warn +// SystemZ-warn: warning: Unable to protect inline asm that clobbers stack pointer against stack clash + +int foo(int c) { + int r; + __asm__("ag %%r15, %0" + : + : "rm"(c) + : "r15"); + return r; +} Index: llvm/include/llvm/ADT/Triple.h =================================================================== --- llvm/include/llvm/ADT/Triple.h +++ llvm/include/llvm/ADT/Triple.h @@ -736,6 +736,11 @@ return getArch() == Triple::riscv32 || getArch() == Triple::riscv64; } + /// Tests whether the target is SystemZ. + bool isSystemZ() const { + return getArch() == Triple::systemz; + } + /// Tests whether the target is x86 (32- or 64-bit). bool isX86() const { return getArch() == Triple::x86 || getArch() == Triple::x86_64; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -43,6 +43,8 @@ RegScavenger *RS) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -373,6 +373,49 @@ } } +// Create a new basic block after MBB. +static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +// Split MBB before MI and return the new block (the one that contains MI). +static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +// Add CFI for the new CFA offset. +static void buildCFAOffs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, int Offset, + const SystemZInstrInfo *ZII) { + unsigned CFIIndex = MBB.getParent()->addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, Offset)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +// Add CFI for the new frame location. +static void buildDefCFAReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Reg, + const SystemZInstrInfo *ZII) { + MachineFunction &MF = *MBB.getParent(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + unsigned RegNum = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + void SystemZFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); @@ -461,13 +504,23 @@ // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); - emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); - - // Add CFI for the allocation. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { + // stack probing may involve looping, and control flow generations is + // disallowed at this point. Rely to later processing through + // `inlineStackProbe`. + MachineInstr *Stub = BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL)) + .addExternalSymbol("__chkstk_stub"); + + // Encode the static offset as a metadata attached to the stub. + LLVMContext &Context = MF.getFunction().getContext(); + MachineInstrBuilder(MF, Stub).addMetadata( + MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get( + IntegerType::get(Context, 64), StackSize))})); + } + else { + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + } SPOffsetFromCFA += Delta; if (StoreBackchain) { @@ -485,11 +538,7 @@ .addReg(SystemZ::R15D); // Add CFI for the new frame location. - unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaRegister(nullptr, HardFP)); - BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII); // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R11 as live on entry when @@ -582,6 +631,100 @@ } } +void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast(MF.getSubtarget().getInstrInfo()); + const SystemZSubtarget &STI = MF.getSubtarget(); + const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + + const StringRef ChkStkStubSymbol = "__chkstk_stub"; + MachineInstr *ChkStkStubMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.isCall() && MI.getOperand(0).isSymbol() && + ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) { + ChkStkStubMI = &MI; + break; + } + if (ChkStkStubMI == nullptr) + return; + assert(ChkStkStubMI->getOperand(1).isMetadata() && + "no metadata attached to that probe"); + uint64_t StackSize = + cast( + cast( + cast(ChkStkStubMI->getOperand(1).getMetadata()) + ->getOperand(0)) + ->getValue()) + ->getZExtValue(); + + unsigned StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t NumFullBlocks = StackSize / StackProbeSize; + uint64_t Residual = StackSize % StackProbeSize; + int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; + MachineBasicBlock *MBB = &PrologMBB; + MachineBasicBlock::iterator MBBI = ChkStkStubMI; + const DebugLoc DL = ChkStkStubMI->getDebugLoc(); + + // Allocate a block of Size bytes on the stack and probe it. + auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, + MachineBasicBlock::iterator InsPt, unsigned Size, + bool EmitCFI) -> void { + emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII); + if (EmitCFI) { + SPOffsetFromCFA -= Size; + buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII); + } + // Probe by means of a volatile compare. + MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG)).addReg(SystemZ::R0D) + .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0) + .addMemOperand(MMO); + }; + + if (NumFullBlocks < 3) { + // Emit unrolled probe statements. + for (unsigned int i = 0; i < NumFullBlocks; i++) + allocateAndProbe(*MBB, MBBI, StackProbeSize, true/*EmitCFI*/); + } else { + // Emit a loop probing the pages. + uint64_t LoopAlloc = StackProbeSize * NumFullBlocks; + SPOffsetFromCFA -= LoopAlloc; + + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) + .addReg(SystemZ::R15D); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); + buildCFAOffs(*MBB, MBBI, DL, SystemZMC::CallFrameSize + LoopAlloc, ZII); + + MachineBasicBlock *DoneMBB = splitBlockBefore(MBBI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(MBB); + MBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + MBB = LoopMBB; + allocateAndProbe(*MBB, MBB->end(), StackProbeSize, false/*EmitCFI*/); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) + .addReg(SystemZ::R15D).addReg(SystemZ::R1D); + BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); + + MBB = DoneMBB; + MBBI = DoneMBB->begin(); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); + if (!Residual) + buildCFAOffs(*MBB, MBBI, DL, SPOffsetFromCFA, ZII); + } + + // The residual part that is less than StackProbeSize + if (Residual) + allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + + ChkStkStubMI->eraseFromParent(); +} + bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects() || Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -83,6 +83,10 @@ // base of the dynamically-allocatable area. ADJDYNALLOC, + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + // Count number of bits set in operand 0 per byte. POPCNT, @@ -428,6 +432,7 @@ EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; + bool hasInlineStackProbe(MachineFunction &MF) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -556,6 +561,8 @@ return true; } + unsigned getStackProbeSize(MachineFunction &MF) const; + private: const SystemZSubtarget &Subtarget; @@ -690,6 +697,8 @@ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; + MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, + MachineBasicBlock *MBB) const; MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -824,6 +824,15 @@ return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } +/// Returns true if stack probing through inline assembly is requested. +bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { + // If the function specifically requests inline stack probes, emit them. + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == + "inline-asm"; + return false; +} + bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); @@ -3426,10 +3435,17 @@ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. - SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); - - // Copy the new stack pointer back. - Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + SDValue NewSP; + if (hasInlineStackProbe(MF)) { + NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, + DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); + Chain = NewSP.getValue(1); + } + else { + NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + } // The allocated data lives above the 160 bytes allocated for the standard // frame, plus any outgoing stack arguments. We don't know how much that @@ -5343,6 +5359,7 @@ OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); + OPCODE(PROBED_ALLOCA); OPCODE(POPCNT); OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); @@ -6738,6 +6755,19 @@ return 1; } +unsigned +SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// @@ -7803,6 +7833,73 @@ return MBB; } +MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( + MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + DebugLoc DL = MI.getDebugLoc(); + const unsigned ProbeSize = getStackProbeSize(MF); + unsigned SizeReg = MI.getOperand(2).getReg(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *TailMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *TestMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *BlockMBB = emitBlockAfter(TestMBB); + + unsigned TmpSizeReg = MRI->createVirtualRegister(&SystemZ::GR64BitRegClass); + unsigned TmpSizeReg2 = MRI->createVirtualRegister(&SystemZ::GR64BitRegClass); + + // TestMBB + BuildMI(TestMBB, DL, TII->get(SystemZ::PHI), TmpSizeReg) + .addReg(SizeReg) + .addMBB(MBB) + .addReg(TmpSizeReg2) + .addMBB(BlockMBB); + + BuildMI(TestMBB, DL, TII->get(SystemZ::CLGFI)) + .addReg(TmpSizeReg) + .addImm(ProbeSize); + + BuildMI(TestMBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT).addMBB(TailMBB); + StartMBB->addSuccessor(TestMBB); + TestMBB->addSuccessor(BlockMBB); + TestMBB->addSuccessor(TailMBB); + + // BlockMBB: Allocate and probe by means of a volatile compare. + BuildMI(BlockMBB, DL, TII->get(SystemZ::SLGFI), TmpSizeReg2) + .addReg(TmpSizeReg) + .addImm(ProbeSize); + + BuildMI(BlockMBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addImm(ProbeSize); + + MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + BuildMI(BlockMBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) + .addMemOperand(MMO); + + BuildMI(BlockMBB, DL, TII->get(SystemZ::J)).addMBB(TestMBB); + BlockMBB->addSuccessor(TestMBB); + + // TailMBB + MachineBasicBlock::iterator InsPos = TailMBB->begin(); + BuildMI(*TailMBB, InsPos, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) + .addReg(SystemZ::R15D) + .addReg(TmpSizeReg); + + BuildMI(*TailMBB, InsPos, DL, TII->get(TargetOpcode::COPY), + MI.getOperand(0).getReg()) + .addReg(SystemZ::R15D); + + MI.eraseFromParent(); + return TailMBB; +} + MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { @@ -8063,6 +8160,9 @@ case SystemZ::LTXBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + case SystemZ::PROBED_ALLOCA: + return emitProbedAlloca(MI, MBB); + case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -29,6 +29,11 @@ def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), [(set GR64:$dst, dynalloc12only:$src)]>; +let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, + usesCustomInserter = 1 in + def PROBED_ALLOCA : Pseudo<(outs GR64:$dst), + (ins GR64:$oldSP, GR64:$space), + [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>; //===----------------------------------------------------------------------===// // Branch instructions Index: llvm/lib/Target/SystemZ/SystemZOperators.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZOperators.td +++ llvm/lib/Target/SystemZ/SystemZOperators.td @@ -40,6 +40,10 @@ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZProbedAlloca : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisPtrTy<0>]>; def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>, @@ -269,6 +273,8 @@ SDT_ZSelectCCMask>; def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, + [SDNPHasChain]>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; Index: llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo(i32 %n) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: risbgn %r1, %r2, 30, 189, 2 +; CHECK-NEXT: la %r0, 7(%r1) +; CHECK-NEXT: risbgn %r1, %r0, 29, 188, 0 +; CHECK-NEXT: la %r1, 8(%r1) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: sty %r0, 4792(%r1) +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i32 %n, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 1198 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-large.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-large.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lgfi %r1, 72008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: mvhi 392(%r1), 1 +; CHECK-NEXT: sty %r0, 28792(%r1) +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + + %a = alloca i32, i64 18000, align 16 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 7198 + store volatile i32 1, i32* %b0 + store volatile i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes-mutliple-objects.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 4008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: lghi %r2, 2008 +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: clgfi %r2, 4096 +; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r2, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r2, 4096 +; CHECK-NEXT: jhe .LBB0_3 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: slgr %r15, %r2 +; CHECK-NEXT: la %r2, 168(%r15) +; CHECK-NEXT: nill %r2, 65520 +; CHECK-NEXT: mvhi 2000(%r1), 1 +; CHECK-NEXT: mvhi 800(%r2), 2 +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 1000, align 16 + %b = alloca i32, i64 500, align 16 + %a0 = getelementptr inbounds i32, i32* %a, i64 500 + %b0 = getelementptr inbounds i32, i32* %b, i64 200 + store volatile i32 1, i32* %a0 + store volatile i32 2, i32* %b0 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium-natural-probes.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 8008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: mvhi 392(%r1), 1 +; CHECK-NEXT: sty %r0, 4792(%r1) +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 2000, align 16 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 1198 + store i32 1, i32* %b0 + store i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-medium.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-medium.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 8008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: mvhi 800(%r1), 1 +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 2000, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 200 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-no-free-probe.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-no-free-probe.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo(i64 %i) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 8008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: sllg %r2, %r2, 2 +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: st %r0, 0(%r2,%r1) +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i32 2000, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} + Index: llvm/test/CodeGen/SystemZ/stack-clash-protection.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-protection.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s + +; Small enough to avoid loop. +define void @fun0() #0 { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: .cfi_def_cfa_offset 4256 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: aghi %r15, -88 +; CHECK-NEXT: .cfi_def_cfa_offset 4344 +; CHECK-NEXT: cg %r0, 80(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 4184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [1000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [1000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Uses a loop to allocate and probe in steps. +define void @fun1() #0 { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset -28832 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: aghi %r15, -3512 +; CHECK-NEXT: .cfi_def_cfa_offset 32344 +; CHECK-NEXT: cg %r0, 3504(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 32184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [8000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [8000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Loop with bigger step. +define void @fun2() #0 "stack-probe-size"="8192" { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -24576 +; CHECK-NEXT: .cfi_def_cfa_offset -24736 +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -8192 +; CHECK-NEXT: cg %r0, 8184(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: aghi %r15, -7608 +; CHECK-NEXT: .cfi_def_cfa_offset 32344 +; CHECK-NEXT: cg %r0, 7600(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 32184 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [8000 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [8000 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +; Ends evenly on the step so no remainder needed. +define void @fun3() #0 { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r1 +; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset -28832 +; CHECK-NEXT: .LBB3_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 28832 +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 28672 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [7122 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [7122 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + +attributes #0 = { "probe-stack"="inline-asm" } + Index: llvm/test/CodeGen/SystemZ/stack-clash-small.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-small.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 408 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: mvhi 392(%r1), 1 +; CHECK-NEXT: l %r2, 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 + + %a = alloca i32, i64 100, align 16 + %b = getelementptr inbounds i32, i32* %a, i64 98 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} Index: llvm/test/CodeGen/SystemZ/stack-clash-unknown-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/stack-clash-unknown-call.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg); + +define void @foo() #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: cg %r0, 152(%r15) +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: lghi %r1, 8008 +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slgfi %r1, 4096 +; CHECK-NEXT: slgfi %r15, 4096 +; CHECK-NEXT: cg %r15, 4088(%r15) +; CHECK-NEXT: clgfi %r1, 4096 +; CHECK-NEXT: jhe .LBB0_1 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: slgr %r15, %r1 +; CHECK-NEXT: la %r1, 168(%r15) +; CHECK-NEXT: nill %r1, 65520 +; CHECK-NEXT: lghi %r0, 31 +; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xc 0(256,%r1), 0(%r1) +; CHECK-NEXT: la %r1, 256(%r1) +; CHECK-NEXT: brctg %r0, .LBB0_3 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: xc 0(64,%r1), 0(%r1) +; CHECK-NEXT: lmg %r11, %r15, 248(%r11) +; CHECK-NEXT: br %r14 +; it's important that we don't use the call as a probe here + + %a = alloca i8, i64 8000, align 16 + call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false) + ret void +} + +attributes #0 = {"probe-stack"="inline-asm"}