Index: lib/CodeGen/ShrinkWrap.cpp =================================================================== --- lib/CodeGen/ShrinkWrap.cpp +++ lib/CodeGen/ShrinkWrap.cpp @@ -68,6 +68,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" @@ -157,14 +158,19 @@ const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { BitVector SavedRegs; - const TargetFrameLowering *TFI = - MachineFunc->getSubtarget().getFrameLowering(); + const TargetSubtargetInfo &Subtarget = MachineFunc->getSubtarget(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const TargetLowering *TLI = Subtarget.getTargetLowering(); TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); for (int Reg = SavedRegs.find_first(); Reg != -1; Reg = SavedRegs.find_next(Reg)) CurrentCSRs.insert((unsigned)Reg); + + // Consider the stack pointer as a CSR for the purpose of this pass, since + // calling conventions do not list it among the other CSRs normally . + CurrentCSRs.insert(TLI->getStackPointerRegisterToSaveRestore()); } return CurrentCSRs; } @@ -251,6 +257,12 @@ DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } + // Ignore tail calls. They shouldn't prevent a block from being a candidate + // for save/restore placement even if they use SP. + if (MI.isCall() && MI.isReturn()) + return false; + + const TargetLowering *TLI = MI.getMF()->getSubtarget().getTargetLowering(); for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { @@ -262,7 +274,8 @@ continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); - UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + UseOrDefCSR = PhysReg == TLI->getStackPointerRegisterToSaveRestore() || + RCI.getLastCalleeSavedAlias(PhysReg); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { Index: test/CodeGen/Generic/shrink-wrapping-vla.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/shrink-wrapping-vla.ll @@ -0,0 +1,114 @@ +; Test shrink wrapping placement is correct with respect to calls to llvm.{stacksave,stackrestore} + +; void f(int n, int x[]) { +; if (n < 0) +; return; +; +; int a[n]; +; +; for (int i = 0; i < n; i++) +; a[i] = x[n - i - 1]; +; +; for (int i = 0; i < n; i++) +; x[i] = a[i] + 1; +; } +; +; RUN: llc -mtriple aarch64-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple x86_64-linux %s -o - | FileCheck %s --check-prefix=CHECK-X86 + + +define dso_local void @f(i32 %n, i32* nocapture %x) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %n, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %0 = zext i32 %n to i64 + %1 = tail call i8* @llvm.stacksave() + %vla = alloca i32, i64 %0, align 16 + %cmp132 = icmp eq i32 %n, 0 + br i1 %cmp132, label %for.cond.cleanup8, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %if.end + %sub = add i32 %n, -1 + br label %for.body + +for.cond6.preheader: ; preds = %for.body + %cmp730 = icmp sgt i32 %n, 0 + br i1 %cmp730, label %for.body9, label %for.cond.cleanup8 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv34 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next35, %for.body ] + %2 = trunc i64 %indvars.iv34 to i32 + %sub2 = sub i32 %sub, %2 + %idxprom = sext i32 %sub2 to i64 + %arrayidx = getelementptr inbounds i32, i32* %x, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv34 + store i32 %3, i32* %arrayidx4, align 4 + %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 + %exitcond37 = icmp eq i64 %indvars.iv.next35, %0 + br i1 %exitcond37, label %for.cond6.preheader, label %for.body + +for.cond.cleanup8: ; preds = %for.body9, %if.end, %for.cond6.preheader + tail call void @llvm.stackrestore(i8* %1) + br label %return + +for.body9: ; preds = %for.cond6.preheader, %for.body9 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9 ], [ 0, %for.cond6.preheader ] + %arrayidx11 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv + %4 = load i32, i32* %arrayidx11, align 4 + %add = add nsw i32 %4, 1 + %arrayidx13 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv + store i32 %add, i32* %arrayidx13, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %0 + br i1 %exitcond, label %for.cond.cleanup8, label %for.body9 + +return: ; preds = %entry, %for.cond.cleanup8 + ret void +} + +; Function Attrs: nounwind +declare i8* @llvm.stacksave() #1 + +; Function Attrs: nounwind +declare void @llvm.stackrestore(i8*) #1 + +; Check that llvm.stackrestore() happens before CSRs are popped off the stack + +; CHECK-LABEL: f + +; CHECK-ARM: stp x29, x30, [sp, #-16]! +; CHECK-ARM-NEXT: mov x29, sp + +; VLA allocation +; CHECK-ARM: add [[X1:x[0-9]+]], [[X1]], #15 +; CHECK-ARM: mov [[X2:x[0-9]+]], sp +; CHECK_ARM: and [[X1]], [[X1]], #0x7fffffff0 +; Saving the SP via llvm.stacksave() +; CHECK-ARM: mov [[SAVE:x[0-9]+]], sp +; CHECK-ARM: sub [[X2]], [[X2]], [[X1]] + +; The next instruction comes from llvm.stackrestore() +; CHECK-ARM: mov sp, [[SAVE]] +; Epilogue +; CHECK-ARM-NEXT: mov sp, x29 +; CHECK-ARM-NEXT: ldp x29, x30, [sp], #16 + + +; CHECK-X86: pushq %rbp +; CHECK-X86: movq %rsp, %rbp + +; Saving the SP via llvm.stacksave() +; CHECK-X86: movq %rsp, [[SAVE:%r[a-z0-9]+]] +; VLA allocation +; CHECK-X86: movq %rsp, [[TMP:%r[a-z0-9]+]] +; CHECK-X86: subq %r{{[a-z0-9]+}}, [[TMP]] +; CHECK-X86: movq [[TMP]], %rsp + +; The next instruction comes from llvm.stackrestore() +; CHECK-X86: movq [[SAVE]], %rsp +; Epilogue +; CHECK-X86-NEXT: movq %rbp, %rsp +; CHECK-X86-NEXT: popq %rbp