Diff 282221

llvm/lib/Target/X86/X86FrameLowering.h

Show First 20 Lines • Show All 207 Lines • ▼ Show 20 Lines	void emitStackProbeInlineWindowsCoreCLR64(MachineFunction &MF,
bool InProlog) const;		bool InProlog) const;
void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,		void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,		MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;		const DebugLoc &DL, bool InProlog) const;

void emitStackProbeInlineGenericBlock(MachineFunction &MF,		void emitStackProbeInlineGenericBlock(MachineFunction &MF,
MachineBasicBlock &MBB,		MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,		MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,		const DebugLoc &DL, uint64_t Offset,
uint64_t Offset) const;		uint64_t Align) const;

void emitStackProbeInlineGenericLoop(MachineFunction &MF,		void emitStackProbeInlineGenericLoop(MachineFunction &MF,
MachineBasicBlock &MBB,		MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,		MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,		const DebugLoc &DL, uint64_t Offset,
uint64_t Offset) const;		uint64_t Align) const;

/// Emit a stub to later inline the target stack probe.		/// Emit a stub to later inline the target stack probe.
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,		MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
MachineBasicBlock &MBB,		MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,		MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,		const DebugLoc &DL,
bool InProlog) const;		bool InProlog) const;

Show All 29 Lines

llvm/lib/Target/X86/X86FrameLowering.cpp

Show First 20 Lines • Show All 580 Lines • ▼ Show 20 Lines	void X86FrameLowering::emitStackProbeInlineGeneric(
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();		const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();		const X86TargetLowering &TLI = *STI.getTargetLowering();
assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&		assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
"different expansion expected for CoreCLR 64 bit");		"different expansion expected for CoreCLR 64 bit");

const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);		const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
uint64_t ProbeChunk = StackProbeSize * 8;		uint64_t ProbeChunk = StackProbeSize * 8;

		uint64_t MaxAlign = calculateMaxStackAlign(MF);

// Synthesize a loop or unroll it, depending on the number of iterations.		// Synthesize a loop or unroll it, depending on the number of iterations.
if (Offset > ProbeChunk) {		if (Offset > ProbeChunk \|\| MaxAlign > ProbeChunk) {
emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);		emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, MaxAlign);
} else {		} else {
emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);		emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, MaxAlign);
}		}
}		}

void X86FrameLowering::emitStackProbeInlineGenericBlock(		void X86FrameLowering::emitStackProbeInlineGenericBlock(
MachineFunction &MF, MachineBasicBlock &MBB,		MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,		MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
uint64_t Offset) const {		uint64_t Align) const {

const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();		const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();		const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);		const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;		const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);		const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);

uint64_t CurrentOffset = 0;		uint64_t CurrentOffset = 0;
// 0 Thanks to return address being saved on the stack
uint64_t CurrentProbeOffset = 0;		uint64_t CurrentProbeOffset = 0;

		if (CurrentOffset + StackProbeSize < Offset) {
		assert(Align < StackProbeSize &&
		"Should be an emitStackProbeInlineGenericLoop");

		MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
		.addReg(StackPtr)
		.addImm(StackProbeSize - Align)
		.setMIFlag(MachineInstr::FrameSetup);
		MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.

		addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
		.setMIFlag(MachineInstr::FrameSetup),
		StackPtr, false, 0)
		.addImm(0)
		.setMIFlag(MachineInstr::FrameSetup);
		NumFrameExtraProbe++;
		CurrentOffset = StackProbeSize - Align;
		}

// For the first N - 1 pages, just probe. I tried to take advantage of		// For the first N - 1 pages, just probe. I tried to take advantage of
// natural probes but it implies much more logic and there was very few		// natural probes but it implies much more logic and there was very few
// interesting natural probes to interleave.		// interesting natural probes to interleave.
while (CurrentOffset + StackProbeSize < Offset) {		while (CurrentOffset + StackProbeSize < Offset) {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)		MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)		.addReg(StackPtr)
.addImm(StackProbeSize)		.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);		.setMIFlag(MachineInstr::FrameSetup);
Show All 15 Lines	MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)		.addReg(StackPtr)
.addImm(ChunkSize)		.addImm(ChunkSize)
.setMIFlag(MachineInstr::FrameSetup);		.setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.		MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}		}

void X86FrameLowering::emitStackProbeInlineGenericLoop(		void X86FrameLowering::emitStackProbeInlineGenericLoop(
MachineFunction &MF, MachineBasicBlock &MBB,		MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,		MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
uint64_t Offset) const {		uint64_t Align) const {
assert(Offset && "null offset");		assert(Offset && "null offset");

const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();		const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();		const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;		const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);		const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);

		if (Align) {
		lkailUnsubmitted Not Done Reply Inline Actions When `Align > StackProbeSize`, which should be rare, is there any chance that the first probe is performed on the old frame? lkail: When `Align > StackProbeSize`, which should be rare, is there any chance that the first probe…
		serge-sans-pailleAuthorUnsubmitted Done Reply Inline Actions That's a concern I have. In that particular code that's ok, because we check `Align <= StackProbeSize` and we just adjust the first probe. In the other case what we're basically doing is rsp += rem(rsp, align) rsp -= align rsp += StackProbeSize rsp = 0 if rsp is already aligned, we end-up doing `align/StackProbeSize` useless probing serge-sans-paille:* That's a concern I have. In that particular code that's ok, because we check `Align <=…
		const unsigned ADDOpc = getADDriOpcode(Uses64BitFramePtr, Align);
		BuildMI(MBB, MBBI, DL, TII.get(ADDOpc), StackPtr)
		.addReg(StackPtr)
		.addImm(Align)
		.setMIFlag(MachineInstr::FrameSetup);
		Offset += Align;
		}

// Synthesize a loop		// Synthesize a loop
NumFrameLoopProbe++;		NumFrameLoopProbe++;
const BasicBlock *LLVM_BB = MBB.getBasicBlock();		const BasicBlock *LLVM_BB = MBB.getBasicBlock();

MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);		MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);		MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);

MachineFunction::iterator MBBIter = ++MBB.getIterator();		MachineFunction::iterator MBBIter = ++MBB.getIterator();
MF.insert(MBBIter, testMBB);		MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);		MF.insert(MBBIter, tailMBB);

Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;		Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)		BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)		.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);		.setMIFlag(MachineInstr::FrameSetup);

// save loop bound		// save loop bound
{		{
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);		const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)		BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)		.addReg(FinalStackProbed)
.addImm(Offset / StackProbeSize * StackProbeSize)		.addImm(Offset / StackProbeSize * StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);		.setMIFlag(MachineInstr::FrameSetup);
}		}

// allocate a page		// allocate a page
{		{
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);		const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
BuildMI(testMBB, DL, TII.get(Opc), StackPtr)		BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)		.addReg(StackPtr)
.addImm(StackProbeSize)		.addImm(StackProbeSize)
.setMIFlag(MachineInstr::FrameSetup);		.setMIFlag(MachineInstr::FrameSetup);
}		}

// touch the page		// touch the page
addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))		addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
.setMIFlag(MachineInstr::FrameSetup),		.setMIFlag(MachineInstr::FrameSetup),
▲ Show 20 Lines • Show All 371 Lines • ▼ Show 20 Lines	void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,

// The EFLAGS implicit def is dead.		// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();		MI->getOperand(3).setIsDead();
}		}

bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {		bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
// x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be		// x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
// clobbered by any interrupt handler.		// clobbered by any interrupt handler.
assert(&STI == &MF.getSubtarget<X86Subtarget>() &&		assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
		efriedmaUnsubmitted Not Done Reply Inline Actions I don't think this condition is right. Say MaxAlign == StackProbeSize. Then an "and" can allocate up to StackProbeSize-4 bytes. So any subsequent stack allocation can jump over a guard page. (This is an extreme example. Really, it doesn't matter what the alignment is; it's just harder to cause a practical issue if the alignment is small.) In general, we can't skip a probe for a stack allocation. We can only merge the probes for adjacent stack allocations. Say, for example, we realign the stack then allocate "Offset" bytes of aligned memory. We can get away with considering both allocations as a single "allocation" if `MaxAlign+Offset <= StackProbeSize`. But that method of proof works if you analyze them together. If you analyze each allocation independently, you can't prove the safety, so the realignment needs its own probe. efriedma: I don't think this condition is right. Say MaxAlign == StackProbeSize. Then an "and" can…
		lkailUnsubmitted Not Done Reply Inline Actions Good catch. Looks PPC64's implementation also has the same issue. I'll post a patch to fix this issue for PPC64. lkail: Good catch. Looks PPC64's implementation also has the same issue. I'll post a patch to fix this…
"MF used frame lowering for wrong subtarget");		"MF used frame lowering for wrong subtarget");
const Function &Fn = MF.getFunction();		const Function &Fn = MF.getFunction();
const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());		const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);		return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
}		}


/// emitPrologue - Push callee-saved registers onto the stack, which		/// emitPrologue - Push callee-saved registers onto the stack, which
▲ Show 20 Lines • Show All 2,370 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/stack-clash-large.ll

Show All 10 Lines	define i32 @foo() local_unnamed_addr #0 {
%c = load volatile i32, i32* %a		%c = load volatile i32, i32* %a
ret i32 %c		ret i32 %c
}		}

attributes #0 = {"probe-stack"="inline-asm"}		attributes #0 = {"probe-stack"="inline-asm"}

; CHECK-X86-64-LABEL: foo:		; CHECK-X86-64-LABEL: foo:
; CHECK-X86-64: # %bb.0:		; CHECK-X86-64: # %bb.0:
		; CHECK-X86-64-NEXT: addq $16, %rsp
; CHECK-X86-64-NEXT: movq %rsp, %r11		; CHECK-X86-64-NEXT: movq %rsp, %r11
; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000		; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000
; CHECK-X86-64-NEXT: .LBB0_1:		; CHECK-X86-64-NEXT: .LBB0_1:
; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000		; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
; CHECK-X86-64-NEXT: movq $0, (%rsp)		; CHECK-X86-64-NEXT: movq $0, (%rsp)
; CHECK-X86-64-NEXT: cmpq %r11, %rsp		; CHECK-X86-64-NEXT: cmpq %r11, %rsp
; CHECK-X86-64-NEXT: jne .LBB0_1		; CHECK-X86-64-NEXT: jne .LBB0_1
; CHECK-X86-64-NEXT:# %bb.2:		; CHECK-X86-64-NEXT:# %bb.2:
; CHECK-X86-64-NEXT: subq $2248, %rsp		; CHECK-X86-64-NEXT: subq $2264, %rsp
; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888		; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888
; CHECK-X86-64-NEXT: movl $1, 264(%rsp)		; CHECK-X86-64-NEXT: movl $1, 264(%rsp)
; CHECK-X86-64-NEXT: movl $1, 28664(%rsp)		; CHECK-X86-64-NEXT: movl $1, 28664(%rsp)
; CHECK-X86-64-NEXT: movl -128(%rsp), %eax		; CHECK-X86-64-NEXT: movl -128(%rsp), %eax
; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8		; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8
; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8		; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8
; CHECK-X86-64-NEXT: retq		; CHECK-X86-64-NEXT: retq

; CHECK-X86-32-LABEL: foo:		; CHECK-X86-32-LABEL: foo:
; CHECK-X86-32: # %bb.0:		; CHECK-X86-32: # %bb.0:
		; CHECK-X86-32-NEXT: addl $16, %esp
; CHECK-X86-32-NEXT: movl %esp, %r11d		; CHECK-X86-32-NEXT: movl %esp, %r11d
; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000		; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000
; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1		; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000		; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
; CHECK-X86-32-NEXT: movl $0, (%esp)		; CHECK-X86-32-NEXT: movl $0, (%esp)
; CHECK-X86-32-NEXT: cmpl %r11d, %esp		; CHECK-X86-32-NEXT: cmpl %r11d, %esp
; CHECK-X86-32-NEXT: jne .LBB0_1		; CHECK-X86-32-NEXT: jne .LBB0_1
; CHECK-X86-32-NEXT:# %bb.2:		; CHECK-X86-32-NEXT:# %bb.2:
; CHECK-X86-32-NEXT: subl $2380, %esp		; CHECK-X86-32-NEXT: subl $2396, %esp
; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016		; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016
; CHECK-X86-32-NEXT: movl $1, 392(%esp)		; CHECK-X86-32-NEXT: movl $1, 392(%esp)
; CHECK-X86-32-NEXT: movl $1, 28792(%esp)		; CHECK-X86-32-NEXT: movl $1, 28792(%esp)
; CHECK-X86-32-NEXT: movl (%esp), %eax		; CHECK-X86-32-NEXT: movl (%esp), %eax
; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C		; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C
; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4		; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4
; CHECK-X86-32-NEXT: retl		; CHECK-X86-32-NEXT: retl

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s \| FileCheck %s			; RUN: llc < %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	define i32 @foo() local_unnamed_addr #0 {			define i32 @foo() local_unnamed_addr #0 {
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000			; CHECK-NEXT: subq $4080, %rsp # imm = 0xFF0
	; CHECK-NEXT: movq $0, (%rsp)			; CHECK-NEXT: movq $0, (%rsp)
	; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8			; CHECK-NEXT: subq $1800, %rsp # imm = 0x708
	; CHECK-NEXT: .cfi_def_cfa_offset 5888			; CHECK-NEXT: .cfi_def_cfa_offset 5888
	; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)			; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
	; CHECK-NEXT: movl $2, {{[0-9]+}}(%rsp)			; CHECK-NEXT: movl $2, {{[0-9]+}}(%rsp)
	; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax			; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
	; CHECK-NEXT: addq $5880, %rsp # imm = 0x16F8			; CHECK-NEXT: addq $5880, %rsp # imm = 0x16F8
	; CHECK-NEXT: .cfi_def_cfa_offset 8			; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%a = alloca i32, i64 1000, align 16			%a = alloca i32, i64 1000, align 16
	Show All 10 Lines

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll

	; RUN: llc < %s \| FileCheck %s			; RUN: llc < %s \| FileCheck %s


	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	define i32 @foo() local_unnamed_addr #0 {			define i32 @foo() local_unnamed_addr #0 {

	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000			; CHECK-NEXT: subq $4080, %rsp # imm = 0xFF0
	; CHECK-NEXT: movq $0, (%rsp)			; CHECK-NEXT: movq $0, (%rsp)
	; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8			; CHECK-NEXT: subq $3800, %rsp # imm = 0xED8
	; CHECK-NEXT: .cfi_def_cfa_offset 7888			; CHECK-NEXT: .cfi_def_cfa_offset 7888
	; CHECK-NEXT: movl $1, 264(%rsp)			; CHECK-NEXT: movl $1, 264(%rsp)
	; CHECK-NEXT: movl $1, 4664(%rsp)			; CHECK-NEXT: movl $1, 4664(%rsp)
	; CHECK-NEXT: movl -128(%rsp), %eax			; CHECK-NEXT: movl -128(%rsp), %eax
	; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8			; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
	; CHECK-NEXT: .cfi_def_cfa_offset 8			; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq			; CHECK-NEXT: retq

	Show All 12 Lines

llvm/test/CodeGen/X86/stack-clash-medium.ll

	; RUN: llc -mtriple=x86_64-linux-android < %s \| FileCheck -check-prefix=CHECK-X86-64 %s			; RUN: llc -mtriple=x86_64-linux-android < %s \| FileCheck -check-prefix=CHECK-X86-64 %s
	; RUN: llc -mtriple=i686-linux-android < %s \| FileCheck -check-prefix=CHECK-X86-32 %s			; RUN: llc -mtriple=i686-linux-android < %s \| FileCheck -check-prefix=CHECK-X86-32 %s

	define i32 @foo() local_unnamed_addr #0 {			define i32 @foo() local_unnamed_addr #0 {
	%a = alloca i32, i64 2000, align 16			%a = alloca i32, i64 2000, align 16
	%b = getelementptr inbounds i32, i32* %a, i64 200			%b = getelementptr inbounds i32, i32* %a, i64 200
	store volatile i32 1, i32* %b			store volatile i32 1, i32* %b
	%c = load volatile i32, i32* %a			%c = load volatile i32, i32* %a
	ret i32 %c			ret i32 %c
	}			}

	attributes #0 = {"probe-stack"="inline-asm"}			attributes #0 = {"probe-stack"="inline-asm"}

	; CHECK-X86-64-LABEL: foo:			; CHECK-X86-64-LABEL: foo:
	; CHECK-X86-64: # %bb.0:			; CHECK-X86-64: # %bb.0:
	; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000			; CHECK-X86-64-NEXT: subq $4080, %rsp # imm = 0xFF0
	; CHECK-X86-64-NEXT: movq $0, (%rsp)			; CHECK-X86-64-NEXT: movq $0, (%rsp)
	; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8			; CHECK-X86-64-NEXT: subq $3800, %rsp # imm = 0xED8
	; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888			; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888
	; CHECK-X86-64-NEXT: movl $1, 672(%rsp)			; CHECK-X86-64-NEXT: movl $1, 672(%rsp)
	; CHECK-X86-64-NEXT: movl -128(%rsp), %eax			; CHECK-X86-64-NEXT: movl -128(%rsp), %eax
	; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8			; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8
	; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8			; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8
	; CHECK-X86-64-NEXT: retq			; CHECK-X86-64-NEXT: retq


	; CHECK-X86-32-LABEL: foo:			; CHECK-X86-32-LABEL: foo:
	; CHECK-X86-32: # %bb.0:			; CHECK-X86-32: # %bb.0:
	; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000			; CHECK-X86-32-NEXT: subl $4080, %esp # imm = 0xFF0
	; CHECK-X86-32-NEXT: movl $0, (%esp)			; CHECK-X86-32-NEXT: movl $0, (%esp)
	; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C			; CHECK-X86-32-NEXT: subl $3932, %esp # imm = 0xF5C
	; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016			; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016
	; CHECK-X86-32-NEXT: movl $1, 800(%esp)			; CHECK-X86-32-NEXT: movl $1, 800(%esp)
	; CHECK-X86-32-NEXT: movl (%esp), %eax			; CHECK-X86-32-NEXT: movl (%esp), %eax
	; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C			; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C
	; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4			; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4
	; CHECK-X86-32-NEXT: retl			; CHECK-X86-32-NEXT: retl

llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll

	; RUN: llc < %s \| FileCheck %s			; RUN: llc < %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	define i32 @foo(i64 %i) local_unnamed_addr #0 {			define i32 @foo(i64 %i) local_unnamed_addr #0 {
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000			; CHECK-NEXT: subq $4080, %rsp # imm = 0xFF0
	; CHECK-NEXT: movq $0, (%rsp)			; CHECK-NEXT: movq $0, (%rsp)
	; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8			; CHECK-NEXT: subq $3800, %rsp # imm = 0xED8
	; CHECK-NEXT: .cfi_def_cfa_offset 7888			; CHECK-NEXT: .cfi_def_cfa_offset 7888
	; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)			; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
	; CHECK-NEXT: movl -128(%rsp), %eax			; CHECK-NEXT: movl -128(%rsp), %eax
	; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8			; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
	; CHECK-NEXT: .cfi_def_cfa_offset 8			; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq			; CHECK-NEXT: retq

	%a = alloca i32, i32 2000, align 16			%a = alloca i32, i32 2000, align 16
	%b = getelementptr inbounds i32, i32* %a, i64 %i			%b = getelementptr inbounds i32, i32* %a, i64 %i
	store volatile i32 1, i32* %b			store volatile i32 1, i32* %b
	%c = load volatile i32, i32* %a			%c = load volatile i32, i32* %a
	ret i32 %c			ret i32 %c
	}			}

	attributes #0 = {"probe-stack"="inline-asm"}			attributes #0 = {"probe-stack"="inline-asm"}

llvm/test/CodeGen/X86/stack-clash-small-large-align.ll

This file was added.

				; RUN: llc < %s \| FileCheck %s


				target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-unknown-linux-gnu"

				define i32 @foo_noprotect() local_unnamed_addr {
				; CHECK-LABEL: foo_noprotect:
				; CHECK: # %bb.0:
				; CHECK-NEXT: pushq %rbp
				; CHECK-NEXT: .cfi_def_cfa_offset 16
				; CHECK-NEXT: .cfi_offset %rbp, -16
				; CHECK-NEXT: movq %rsp, %rbp
				; CHECK-NEXT: .cfi_def_cfa_register %rbp
				; CHECK-NEXT: andq $-65536, %rsp
				; CHECK-NEXT: subq $65536, %rsp
				; CHECK-NEXT: movl $1, 392(%rsp)
				; CHECK-NEXT: movl (%rsp), %eax
				; CHECK-NEXT: movq %rbp, %rsp
				; CHECK-NEXT: popq %rbp
				; CHECK-NEXT: .cfi_def_cfa %rsp, 8
				; CHECK-NEXT: retq



				%a = alloca i32, i64 100, align 65536
				%b = getelementptr inbounds i32, i32* %a, i64 98
				store volatile i32 1, i32* %b
				%c = load volatile i32, i32* %a
				ret i32 %c
				}

				define i32 @foo_protect() local_unnamed_addr #0 {
				; CHECK-LABEL: foo_protect:
				; CHECK: # %bb.0:
				; CHECK-NEXT: pushq %rbp
				; CHECK-NEXT: .cfi_def_cfa_offset 16
				; CHECK-NEXT: .cfi_offset %rbp, -16
				; CHECK-NEXT: movq %rsp, %rbp
				; CHECK-NEXT: .cfi_def_cfa_register %rbp
				; CHECK-NEXT: andq $-65536, %rsp
				; CHECK-NEXT: addq $65536, %rsp
				cuviperUnsubmitted Not Done Reply Inline Actions There's an immediate race after `andq $-65536, %rsp` -- if we get a signal here, the stack pointer could be in a potentially bad place and start clobbering stuff. Then adding the full alignment puts us into arbitrary stack memory, or even could go past the top of the stack. If we start writing probes from there, who knows what memory we're clobbering. (akin to @lkail's concern) Consider if the stack is almost aligned to begin with, something like `0x1230010`. The `and` will only adjust a small distance to `0x1230000`, and then the `add` makes it `0x1240000`. The first probe will be a page below that, `0x123E000`, but that's still way out of our frame, not memory we should be writing. Actually, that plays just as badly if the stack is perfectly aligned to begin with. The other extreme was the original concern, perhaps with an incoming stack like `0x123FFE0`. Then it will again `and` to `0x1230000`, `add` to `0x1240000`, and start probing at `0x123E000`, which is OK in this case. cuviper: There's an immediate race after `andq $-65536, %rsp` -- if we get a signal here, the stack…
				cuviperUnsubmitted Not Done Reply Inline Actions Oops, those start probing at `0x123F000`, sorry for the bad math. The point stands though. cuviper: Oops, those start probing at `0x123F000`, sorry for the bad math. The point stands though.
				; CHECK-NEXT: movq %rsp, %r11
				; CHECK-NEXT: subq $131072, %r11
				; CHECK-NEXT:.LBB0_1:
				; CHECK-NEXT: subq $4096, %rsp
				; CHECK-NEXT: movq $0, (%rsp)
				; CHECK-NEXT: cmpq %r11, %rsp
				; CHECK-NEXT: jne .LBB0_1
				; CHECK-NEXT:# %bb.2:
				; CHECK-NEXT: movl $1, 392(%rsp)
				; CHECK-NEXT: movl (%rsp), %eax
				; CHECK-NEXT: movq %rbp, %rsp
				; CHECK-NEXT: popq %rbp
				; CHECK-NEXT: .cfi_def_cfa %rsp, 8
				; CHECK-NEXT: retq


				%a = alloca i32, i64 100, align 65536
				%b = getelementptr inbounds i32, i32* %a, i64 98
				store volatile i32 1, i32* %b
				%c = load volatile i32, i32* %a
				ret i32 %c
				}

				attributes #0 = {"probe-stack"="inline-asm"}

llvm/test/CodeGen/X86/stack-clash-unknown-call.ll

	; RUN: llc < %s \| FileCheck %s			; RUN: llc < %s \| FileCheck %s


	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg);			declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg);

	define void @foo() local_unnamed_addr #0 {			define void @foo() local_unnamed_addr #0 {

	;CHECK-LABEL: foo:			;CHECK-LABEL: foo:
	;CHECK: # %bb.0:			;CHECK: # %bb.0:
	;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000			;CHECK-NEXT: subq $4080, %rsp # imm = 0xFF0
	; it's important that we don't use the call as a probe here			; it's important that we don't use the call as a probe here
	;CHECK-NEXT: movq $0, (%rsp)			;CHECK-NEXT: movq $0, (%rsp)
	;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48			;CHECK-NEXT: subq $3928, %rsp # imm = 0xF58
	;CHECK-NEXT: .cfi_def_cfa_offset 8016			;CHECK-NEXT: .cfi_def_cfa_offset 8016
	;CHECK-NEXT: movq %rsp, %rdi			;CHECK-NEXT: movq %rsp, %rdi
	;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40			;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40
	;CHECK-NEXT: xorl %esi, %esi			;CHECK-NEXT: xorl %esi, %esi
	;CHECK-NEXT: callq memset			;CHECK-NEXT: callq memset
	;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48			;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48
	;CHECK-NEXT: .cfi_def_cfa_offset 8			;CHECK-NEXT: .cfi_def_cfa_offset 8
	;CHECK-NEXT: retq			;CHECK-NEXT: retq

	%a = alloca i8, i64 8000, align 16			%a = alloca i8, i64 8000, align 16
	call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)			call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false)
	ret void			ret void
	}			}

	attributes #0 = {"probe-stack"="inline-asm"}			attributes #0 = {"probe-stack"="inline-asm"}

This is an archive of the discontinued LLVM Phabricator instance.

Fix interaction between stack alignment and inline-asm stack clash protection
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 282221

llvm/lib/Target/X86/X86FrameLowering.h

llvm/lib/Target/X86/X86FrameLowering.cpp

llvm/test/CodeGen/X86/stack-clash-large.ll

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll

llvm/test/CodeGen/X86/stack-clash-medium.ll

llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll

llvm/test/CodeGen/X86/stack-clash-small-large-align.ll

llvm/test/CodeGen/X86/stack-clash-unknown-call.ll

This is an archive of the discontinued LLVM Phabricator instance.

Fix interaction between stack alignment and inline-asm stack clash protectionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 282221

llvm/lib/Target/X86/X86FrameLowering.h

llvm/lib/Target/X86/X86FrameLowering.cpp

llvm/test/CodeGen/X86/stack-clash-large.ll

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll

llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll

llvm/test/CodeGen/X86/stack-clash-medium.ll

llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll

llvm/test/CodeGen/X86/stack-clash-small-large-align.ll

llvm/test/CodeGen/X86/stack-clash-unknown-call.ll

Fix interaction between stack alignment and inline-asm stack clash protection
ClosedPublic