diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -273,9 +273,8 @@ // allocation is split in smaller chunks anyway. if (EmitInlineStackProbe && !InEpilogue) { - // stack probing may involve looping, and control flow generations is - // disallowed at this point. Rely to later processing through - // `inlineStackProbe`. + // Delegate stack probing to the `inlineStackProbe` mechanism to avoid + // complications. MachineInstr *Stub = emitStackProbeInlineStub(MF, MBB, MBBI, DL, true); // Encode the static offset as a metadata attached to the stub. @@ -643,6 +642,7 @@ MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset) const { + assert(Offset && "null offset"); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); @@ -661,7 +661,7 @@ MF.insert(MBBIter, tailMBB); unsigned FinalStackPtr = Uses64BitFramePtr ? X86::R11 : X86::R11D; - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FinalStackPtr) + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackPtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -691,7 +691,7 @@ .setMIFlag(MachineInstr::FrameSetup); // cmp with stack pointer bound - BuildMI(testMBB, DL, TII.get(IsLP64 ? X86::CMP64rr : X86::CMP32rr)) + BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) .addReg(StackPtr) .addReg(FinalStackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -699,23 +699,22 @@ // jump BuildMI(testMBB, DL, TII.get(X86::JCC_1)) .addMBB(testMBB) - .addImm(X86::COND_NE) + .addImm(X86::COND_L) .setMIFlag(MachineInstr::FrameSetup); testMBB->addSuccessor(testMBB); testMBB->addSuccessor(tailMBB); testMBB->addLiveIn(FinalStackPtr); - // allocate a block and touch it - + // BB management tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); MBB.addSuccessor(testMBB); + // handle tail if (Offset % StackProbeSize) { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(Offset % StackProbeSize) + BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(TargetOpcode::COPY), + StackPtr) + .addReg(FinalStackPtr) .setMIFlag(MachineInstr::FrameSetup); } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31542,14 +31542,26 @@ return SinkMBB; } +static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { + if (IsLP64) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + MachineBasicBlock * X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, - MachineBasicBlock *BB) const { - MachineFunction *MF = BB->getParent(); + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); DebugLoc DL = MI.getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); const unsigned ProbeSize = getStackProbeSize(*MF); @@ -31558,31 +31570,35 @@ MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator MBBIter = ++BB->getIterator(); + MachineFunction::iterator MBBIter = ++MBB->getIterator(); MF->insert(MBBIter, testMBB); MF->insert(MBBIter, blockMBB); MF->insert(MBBIter, tailMBB); - unsigned sizeVReg = MI.getOperand(1).getReg(); + Register sizeVReg = MI.getOperand(1).getReg(); - const TargetRegisterClass *SizeRegClass = MRI.getRegClass(sizeVReg); + Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; - unsigned tmpSizeVReg = MRI.createVirtualRegister(SizeRegClass); - unsigned tmpSizeVReg2 = MRI.createVirtualRegister(SizeRegClass); + Register TmpStackPtr = MRI.createVirtualRegister( + TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); + Register FinalStackPtr = MRI.createVirtualRegister( + TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); - unsigned physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; + BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr) + .addReg(physSPReg); + { + const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr; + BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr) + .addReg(TmpStackPtr) + .addReg(sizeVReg); + } // test rsp size - BuildMI(testMBB, DL, TII->get(X86::PHI), tmpSizeVReg) - .addReg(sizeVReg) - .addMBB(BB) - .addReg(tmpSizeVReg2) - .addMBB(blockMBB); BuildMI(testMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::CMP64ri32 : X86::CMP32ri)) - .addReg(tmpSizeVReg) - .addImm(ProbeSize); + TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(physSPReg) + .addReg(FinalStackPtr); BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) @@ -31593,14 +31609,7 @@ // allocate a block and touch it BuildMI(blockMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri), - tmpSizeVReg2) - .addReg(tmpSizeVReg) - .addImm(ProbeSize); - - BuildMI(blockMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64ri32 : X86::SUB32ri), - physSPReg) + TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) .addReg(physSPReg) .addImm(ProbeSize); @@ -31612,19 +31621,14 @@ BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); blockMBB->addSuccessor(testMBB); - // allocate the tail and continue - BuildMI(tailMBB, DL, - TII->get(TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr), - physSPReg) - .addReg(physSPReg) - .addReg(tmpSizeVReg); + // Replace original instruction by the expected stack ptr BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) - .addReg(physSPReg); + .addReg(FinalStackPtr); - tailMBB->splice(tailMBB->end(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - tailMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(testMBB); + tailMBB->splice(tailMBB->end(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + tailMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(testMBB); // Delete the original pseudo instruction. MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -13,20 +13,20 @@ ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: leaq 15(,%rax,4), %rax -; CHECK-NEXT: andq $-16, %rax -; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000 +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: leaq 15(,%rax,4), %rcx +; CHECK-NEXT: andq $-16, %rcx +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: subq %rcx, %rax +; CHECK-NEXT: cmpq %rax, %rsp ; CHECK-NEXT: jl .LBB0_3 ; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $4096, %rax # imm = 0x1000 ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: cmpq $4096, %rax # imm = 0x1000 +; CHECK-NEXT: cmpq %rax, %rsp ; CHECK-NEXT: jge .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: subq %rax, %rsp -; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: movq %rax, %rsp ; CHECK-NEXT: movl $1, 4792(%rax) ; CHECK-NEXT: movl (%rax), %eax ; CHECK-NEXT: movq %rbp, %rsp diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: cmpq %r11, %rsp -; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: jl .LBB0_1 ; CHECK-NEXT:# %bb.2: -; CHECK-NEXT: subq $2248, %rsp # imm = 0x8C8 +; CHECK-NEXT: movq %r11, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-NEXT: movl $1, 264(%rsp) ; CHECK-NEXT: movl $1, 28664(%rsp)