Diff 142745

lib/CodeGen/ShrinkWrap.cpp

Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines
#include "llvm/CodeGen/MachineInstr.h"		#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"		#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"		#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"		#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/RegisterClassInfo.h"		#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"		#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"		#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"		#include "llvm/CodeGen/TargetInstrInfo.h"
		#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"		#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"		#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"		#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"		#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	class ShrinkWrap : public MachineFunctionPass {
uint64_t EntryFreq;		uint64_t EntryFreq;

/// Current opcode for frame setup.		/// Current opcode for frame setup.
unsigned FrameSetupOpcode;		unsigned FrameSetupOpcode;

/// Current opcode for frame destroy.		/// Current opcode for frame destroy.
unsigned FrameDestroyOpcode;		unsigned FrameDestroyOpcode;

		/// Stack pointer register, used by llvm.{savestack,restorestack}
		unsigned SP;

/// Entry block.		/// Entry block.
const MachineBasicBlock *Entry;		const MachineBasicBlock *Entry;

using SetOfRegs = SmallSetVector<unsigned, 16>;		using SetOfRegs = SmallSetVector<unsigned, 16>;

/// Registers that need to be saved for the current function.		/// Registers that need to be saved for the current function.
mutable SetOfRegs CurrentCSRs;		mutable SetOfRegs CurrentCSRs;

Show All 32 Lines	void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);		RCI.runOnMachineFunction(MF);
MDT = &getAnalysis<MachineDominatorTree>();		MDT = &getAnalysis<MachineDominatorTree>();
MPDT = &getAnalysis<MachinePostDominatorTree>();		MPDT = &getAnalysis<MachinePostDominatorTree>();
Save = nullptr;		Save = nullptr;
Restore = nullptr;		Restore = nullptr;
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();		MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();		MLI = &getAnalysis<MachineLoopInfo>();
EntryFreq = MBFI->getEntryFreq();		EntryFreq = MBFI->getEntryFreq();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();		const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
		const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
FrameSetupOpcode = TII.getCallFrameSetupOpcode();		FrameSetupOpcode = TII.getCallFrameSetupOpcode();
FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();		FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
		SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();
Entry = &MF.front();		Entry = &MF.front();
CurrentCSRs.clear();		CurrentCSRs.clear();
MachineFunc = &MF;		MachineFunc = &MF;

++NumFunc;		++NumFunc;
}		}

/// Check whether or not Save and Restore points are still interesting for		/// Check whether or not Save and Restore points are still interesting for
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {		RegScavenger *RS) const {
if (MI.getOpcode() == FrameSetupOpcode \|\|		if (MI.getOpcode() == FrameSetupOpcode \|\|
MI.getOpcode() == FrameDestroyOpcode) {		MI.getOpcode() == FrameDestroyOpcode) {
DEBUG(dbgs() << "Frame instruction: " << MI << '\n');		DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;		return true;
}		}
for (const MachineOperand &MO : MI.operands()) {		for (const MachineOperand &MO : MI.operands()) {
bool UseOrDefCSR = false;		bool UseOrDefCSR = false;
if (MO.isReg()) {		if (MO.isReg()) {
		thegamegUnsubmitted Done Reply Inline Actions I think this is incorrect if the instruction calls a function with a different calling convention, right? In that case some CSRs would be clobbered while otherwise they were expected to be preserved. IIUC, the issue here is that some instructions are marked as `let Uses = [SP]`, like calls and returns, but are safe to skip during the analysis. If I'm not missing anything, to handle all the cases I think it's better to verify all the register operands and reg masks, then only skip the instruction if it only uses SP and `isCall` or something similar. I'm not sure if `isCall` would handle all the cases, but from a quick glance over InstrInfo.td, other instructions that have `Uses = [SP]` should definitely affect the placement of the save/restore blocks. thegameg:* I think this is incorrect if the instruction calls a function with a different calling…
		chillAuthorUnsubmitted Not Done Reply Inline Actions Thanks, indeed. For register masks, if I understand correctly they are used with call instructions and these should preserve SP, so no need to actually check it ? chill: Thanks, indeed. For register masks, if I understand correctly they are used with call…
// Ignore instructions like DBG_VALUE which don't read/def the register.		// Ignore instructions like DBG_VALUE which don't read/def the register.
if (!MO.isDef() && !MO.readsReg())		if (!MO.isDef() && !MO.readsReg())
continue;		continue;
		junbumlUnsubmitted Done Reply Inline Actions Why don't we make it as a member variable? junbuml: Why don't we make it as a member variable?
		chillAuthorUnsubmitted Not Done Reply Inline Actions Right, I made the stack pointer register number a member variable. chill: Right, I made the stack pointer register number a member variable.
unsigned PhysReg = MO.getReg();		unsigned PhysReg = MO.getReg();
if (!PhysReg)		if (!PhysReg)
continue;		continue;
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&		assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Unallocated register?!");		"Unallocated register?!");
UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);		// The stack pointer is not normally described as a callee-saved register
		// in calling convention definitions, so we need to watch for it
		// separately. An SP mentioned by a call instruction, we can ignore,
		// though, as it's harmless and we do not want to effectively disable tail
		thegamegUnsubmitted Done Reply Inline Actions Small typo: is -> as? thegameg: Small typo: is -> as?
		// calls by forcing the restore point to post-dominate them.
		UseOrDefCSR = (!MI.isCall() && PhysReg == SP) \|\|
		RCI.getLastCalleeSavedAlias(PhysReg);
		thegamegUnsubmitted Done Reply Inline Actions Can you please add a comment explaining why we skip SP here? thegameg: Can you please add a comment explaining why we skip SP here?
} else if (MO.isRegMask()) {		} else if (MO.isRegMask()) {
// Check if this regmask clobbers any of the CSRs.		// Check if this regmask clobbers any of the CSRs.
for (unsigned Reg : getCurrentCSRs(RS)) {		for (unsigned Reg : getCurrentCSRs(RS)) {
if (MO.clobbersPhysReg(Reg)) {		if (MO.clobbersPhysReg(Reg)) {
UseOrDefCSR = true;		UseOrDefCSR = true;
break;		break;
}		}
}		}
▲ Show 20 Lines • Show All 300 Lines • Show Last 20 Lines

test/CodeGen/Generic/shrink-wrapping-vla.ll

This file was added.

				; Test shrink wrapping placement is correct with respect to calls to llvm.{stacksave,stackrestore}

				; void f(int n, int x[]) {
				; if (n < 0)
				; return;
				;
				; int a[n];
				;
				; for (int i = 0; i < n; i++)
				; a[i] = x[n - i - 1];
				;
				; for (int i = 0; i < n; i++)
				; x[i] = a[i] + 1;
				; }
				;
				; RUN: llc -mtriple aarch64-eabi %s -o - \| FileCheck %s --check-prefix=CHECK-ARM
				; RUN: llc -mtriple x86_64-linux %s -o - \| FileCheck %s --check-prefix=CHECK-X86


				define dso_local void @f(i32 %n, i32* nocapture %x) local_unnamed_addr #0 {
				entry:
				%cmp = icmp slt i32 %n, 0
				br i1 %cmp, label %return, label %if.end

				if.end: ; preds = %entry
				%0 = zext i32 %n to i64
				%1 = tail call i8* @llvm.stacksave()
				%vla = alloca i32, i64 %0, align 16
				%cmp132 = icmp eq i32 %n, 0
				br i1 %cmp132, label %for.cond.cleanup8, label %for.body.lr.ph

				for.body.lr.ph: ; preds = %if.end
				%sub = add i32 %n, -1
				br label %for.body

				for.cond6.preheader: ; preds = %for.body
				%cmp730 = icmp sgt i32 %n, 0
				br i1 %cmp730, label %for.body9, label %for.cond.cleanup8

				for.body: ; preds = %for.body, %for.body.lr.ph
				%indvars.iv34 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next35, %for.body ]
				%2 = trunc i64 %indvars.iv34 to i32
				%sub2 = sub i32 %sub, %2
				%idxprom = sext i32 %sub2 to i64
				%arrayidx = getelementptr inbounds i32, i32* %x, i64 %idxprom
				%3 = load i32, i32* %arrayidx, align 4
				%arrayidx4 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv34
				store i32 %3, i32* %arrayidx4, align 4
				%indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1
				%exitcond37 = icmp eq i64 %indvars.iv.next35, %0
				br i1 %exitcond37, label %for.cond6.preheader, label %for.body

				for.cond.cleanup8: ; preds = %for.body9, %if.end, %for.cond6.preheader
				tail call void @llvm.stackrestore(i8* %1)
				br label %return

				for.body9: ; preds = %for.cond6.preheader, %for.body9
				%indvars.iv = phi i64 [ %indvars.iv.next, %for.body9 ], [ 0, %for.cond6.preheader ]
				%arrayidx11 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv
				%4 = load i32, i32* %arrayidx11, align 4
				%add = add nsw i32 %4, 1
				%arrayidx13 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
				store i32 %add, i32* %arrayidx13, align 4
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%exitcond = icmp eq i64 %indvars.iv.next, %0
				br i1 %exitcond, label %for.cond.cleanup8, label %for.body9

				return: ; preds = %entry, %for.cond.cleanup8
				ret void
				}

				; Function Attrs: nounwind
				declare i8* @llvm.stacksave() #1

				; Function Attrs: nounwind
				declare void @llvm.stackrestore(i8*) #1

				; Check that llvm.stackrestore() happens before CSRs are popped off the stack

				; CHECK-LABEL: f

				; CHECK-ARM: stp x29, x30, [sp, #-16]!
				; CHECK-ARM-NEXT: mov x29, sp

				; VLA allocation
				; CHECK-ARM: add [[X1:x[0-9]+]], [[X1]], #15
				; CHECK-ARM: mov [[X2:x[0-9]+]], sp
				; CHECK-ARM: and [[X1]], [[X1]], #0x7fffffff0
				; Saving the SP via llvm.stacksave()
				chillAuthorUnsubmitted Done Reply Inline Actions I saw this one :/ chill: I saw this one :/
				; CHECK-ARM: mov [[SAVE:x[0-9]+]], sp
				; CHECK-ARM: sub [[X2]], [[X2]], [[X1]]

				; The next instruction comes from llvm.stackrestore()
				; CHECK-ARM: mov sp, [[SAVE]]
				; Epilogue
				; CHECK-ARM-NEXT: mov sp, x29
				; CHECK-ARM-NEXT: ldp x29, x30, [sp], #16


				; CHECK-X86: pushq %rbp
				; CHECK-X86: movq %rsp, %rbp

				; Saving the SP via llvm.stacksave()
				; CHECK-X86: movq %rsp, [[SAVE:%r[a-z0-9]+]]
				; VLA allocation
				; CHECK-X86: movq %rsp, [[TMP:%r[a-z0-9]+]]
				; CHECK-X86: subq %r{{[a-z0-9]+}}, [[TMP]]
				; CHECK-X86: movq [[TMP]], %rsp

				; The next instruction comes from llvm.stackrestore()
				; CHECK-X86: movq [[SAVE]], %rsp
				; Epilogue
				; CHECK-X86-NEXT: movq %rbp, %rsp
				; CHECK-X86-NEXT: popq %rbp

This is an archive of the discontinued LLVM Phabricator instance.

Fix incorrect choice of callee-saved registers save/restore points
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 142745

lib/CodeGen/ShrinkWrap.cpp

test/CodeGen/Generic/shrink-wrapping-vla.ll

This is an archive of the discontinued LLVM Phabricator instance.

Fix incorrect choice of callee-saved registers save/restore pointsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 142745

lib/CodeGen/ShrinkWrap.cpp

test/CodeGen/Generic/shrink-wrapping-vla.ll

Fix incorrect choice of callee-saved registers save/restore points
ClosedPublic