Diff 449359

llvm/lib/CodeGen/Spill2Reg.cpp

Show All 10 Lines
/// replaces spills/reloads to/from the stack with register copies to/from the		/// replaces spills/reloads to/from the stack with register copies to/from the
/// vector register file. This works even on targets where load/stores have		/// vector register file. This works even on targets where load/stores have
/// similar latency to register copies because it can free up memory units which		/// similar latency to register copies because it can free up memory units which
/// helps avoid back-end stalls.		/// helps avoid back-end stalls.
///		///
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AllocationOrder.h"		#include "AllocationOrder.h"
		#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveRegUnits.h"		#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"		#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"		#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"		#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"		#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"		#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"

using namespace llvm;		using namespace llvm;

		#define DEBUG_TYPE "Spill2Reg"
		STATISTIC(NumSpill2RegInstrs, "Number of spills/reloads replaced by spill2reg");

namespace {		namespace {

class Spill2Reg : public MachineFunctionPass {		class Spill2Reg : public MachineFunctionPass {
public:		public:
static char ID;		static char ID;
Spill2Reg() : MachineFunctionPass(ID) {		Spill2Reg() : MachineFunctionPass(ID) {
initializeSpill2RegPass(*PassRegistry::getPassRegistry());		initializeSpill2RegPass(*PassRegistry::getPassRegistry());
}		}
▲ Show 20 Lines • Show All 153 Lines • ▼ Show 20 Lines	for (MachineInstr &MI : MBB) {
Entry.Disable = true;		Entry.Disable = true;
}		}
}		}
}		}
}		}
}		}

bool Spill2Reg::isProfitable(const MachineInstr *MI) const {		bool Spill2Reg::isProfitable(const MachineInstr *MI) const {
// TODO: Unimplemented.		return TII->isSpill2RegProfitable(MI, TRI, MRI);
return true;
}		}

bool Spill2Reg::allAccessesProfitable(const StackSlotDataEntry &Entry) const {		bool Spill2Reg::allAccessesProfitable(const StackSlotDataEntry &Entry) const {
auto IsProfitable = [this](const auto &MID) { return isProfitable(MID.MI); };		auto IsProfitable = [this](const auto &MID) { return isProfitable(MID.MI); };
return llvm::all_of(Entry.Spills, IsProfitable) &&		return llvm::all_of(Entry.Spills, IsProfitable) &&
llvm::all_of(Entry.Reloads, IsProfitable);		llvm::all_of(Entry.Reloads, IsProfitable);
}		}

llvm::Optional<MCRegister>		llvm::Optional<MCRegister>
Spill2Reg::tryGetFreePhysicalReg(const TargetRegisterClass *RegClass,		Spill2Reg::tryGetFreePhysicalReg(const TargetRegisterClass *RegClass,
const LiveRegUnits &LRU) {		const LiveRegUnits &LRU) {
auto Order = RegClassInfo.getOrder(RegClass);		auto Order = RegClassInfo.getOrder(RegClass);
for (auto I = Order.begin(), E = Order.end(); I != E; ++I) {		for (auto I = Order.begin(), E = Order.end(); I != E; ++I) {
MCRegister PhysVectorReg = *I;		MCRegister PhysVectorReg = *I;
if (LRU.available(PhysVectorReg))		if (LRU.available(PhysVectorReg))
return PhysVectorReg;		return PhysVectorReg;
}		}
return None;		return None;
}		}

// Replace stack-based spills/reloads with register-based ones.		// Replace stack-based spills/reloads with register-based ones.
void Spill2Reg::replaceStackWithReg(StackSlotDataEntry &Entry,		void Spill2Reg::replaceStackWithReg(StackSlotDataEntry &Entry,
Register VectorReg) {		Register VectorReg) {
// TODO: Unimplemented		for (StackSlotDataEntry::MIData &SpillData : Entry.Spills) {
		MachineInstr *StackSpill = SpillData.MI;
		assert(SpillData.MO->isReg() && "Expected register MO");
		Register OldReg = SpillData.MO->getReg();

		MachineInstr *SpillToVector = TII->spill2RegInsertToVectorReg(
		VectorReg, OldReg, SpillData.SpillBits, StackSpill->getParent(),
		arsenmUnsubmitted Not Done Reply Inline Actions Is this assuming you can only spill one register to one vector register? What if you can place multiple values in different subregisters? arsenm: Is this assuming you can only spill one register to one vector register? What if you can place…
		vporpoAuthorUnsubmitted Done Reply Inline Actions Yes, for now we can only spill one register to the first lane of one vector register. The reason is that if we want to spill to another lane other than the first one in x86 then we need to use the PINSR/PEXTR instructions rather than MOVD which have a higher latency and use more uops. But yeah, I think it is still worth extending it to spill to more lanes in the future. Here is the relevant data from Agner Fog's instruction tables: uops uops uops fused unfused each latency throughput domain domain port Spill-to-reg ------------ MOVD mm/x r32/64 1 1 p5 2 1 MOVD r32/64 mm/x 1 1 p0 2 1 PINSRD/Q x,r,i 2 2 2p5 3 2 PEXTRB/W/D/Q r,x,i 2 2 p0 p5 3 1 vporpo: Yes, for now we can only spill one register to the first lane of one vector register. The…
		arsenmUnsubmitted Not Done Reply Inline Actions The reason I ask is because I'm very interested in using something more like this for AMDGPU. We currently have 2 custom, convoluted mechanisms for handling "spills" to registers. I'm wondering if we could adapt this pass to one of them, but it would require a broader notion of how/where the registers can be spilled (and there might be some additional liveness hazards) arsenm: The reason I ask is because I'm very interested in using something more like this for AMDGPU.
		vporpoAuthorUnsubmitted Done Reply Inline Actions Yeah that would make sense. I think we can make code generation a bit more sophisticated than it is now and have some target specific components decide which register and lane we should use on each spill. Could you point me to the two mechanisms so I can take a look? vporpo: Yeah that would make sense. I think we can make code generation a bit more sophisticated than…
		arsenmUnsubmitted Not Done Reply Inline Actions I don't think either will be much use to you. One of the mechanisms doesn't really use vector registers in the same sense as a subregister, and also relies on reserved registers arsenm: I don't think either will be much use to you. One of the mechanisms doesn't really use vector…
		/InsertBeforeIt=/StackSpill->getIterator(), TRI);

		// Spill to stack is no longer needed.
		StackSpill->eraseFromParent();
		assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()");
		}

		for (StackSlotDataEntry::MIData &ReloadData : Entry.Reloads) {
		MachineInstr *StackReload = ReloadData.MI;
		assert(ReloadData.MO->isReg() && "Expected Reg MO");
		Register OldReg = ReloadData.MO->getReg();

		MachineInstr *ReloadFromReg = TII->spill2RegExtractFromVectorReg(
		OldReg, VectorReg, ReloadData.SpillBits, StackReload->getParent(),
		/InsertBeforeIt=/StackReload->getIterator(), TRI);

		// Reload from stack is no longer needed.
		StackReload->eraseFromParent();
		assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()");
		}
}		}

void Spill2Reg::calculateLiveRegs(StackSlotDataEntry &Entry,		void Spill2Reg::calculateLiveRegs(StackSlotDataEntry &Entry,
LiveRegUnits &LRU) {		LiveRegUnits &LRU) {
// TODO: Unimplemented		// TODO: Unimplemented
}		}

void Spill2Reg::generateCode() {		void Spill2Reg::generateCode() {
Show All 15 Lines	for (auto &Pair : StackSlotData) {
llvm::Optional<MCRegister> PhysVectorRegOpt = tryGetFreePhysicalReg(		llvm::Optional<MCRegister> PhysVectorRegOpt = tryGetFreePhysicalReg(
TII->getVectorRegisterClassForSpill2Reg(TRI, Entry.getSpilledReg()),		TII->getVectorRegisterClassForSpill2Reg(TRI, Entry.getSpilledReg()),
LRU);		LRU);
if (!PhysVectorRegOpt)		if (!PhysVectorRegOpt)
continue;		continue;

// Replace stack accesses with register accesses.		// Replace stack accesses with register accesses.
replaceStackWithReg(Entry, *PhysVectorRegOpt);		replaceStackWithReg(Entry, *PhysVectorRegOpt);

		NumSpill2RegInstrs += Entry.Spills.size() + Entry.Reloads.size();
}		}
}		}

void Spill2Reg::cleanup() { StackSlotData.clear(); }		void Spill2Reg::cleanup() { StackSlotData.clear(); }

bool Spill2Reg::run() {		bool Spill2Reg::run() {
// Walk over each instruction in the code keeping track of the processor's		// Walk over each instruction in the code keeping track of the processor's
// port pressure and look for memory unit hot-spots.		// port pressure and look for memory unit hot-spots.
Show All 27 Lines

llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 \| FileCheck %s
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s --check-prefix=FORCED

				# Simple test to confirm that spill2reg won't apply if there is a vector
				# instruction nearby.

				--- \|
				@D0 = dso_local local_unnamed_addr global i64 0, align 4
				@U0 = dso_local local_unnamed_addr global i64 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 8, alignment: 4 }
				machineFunctionInfo: {}
				body: \|


				bb.0:
				; CHECK-LABEL: name: func
				; CHECK: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				; CHECK-NEXT: $xmm15 = MOV64toPQIrr $rax
				; CHECK-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
				; CHECK-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
				; CHECK-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				; CHECK-NEXT: RET 0
				; FORCED-LABEL: name: func
				; FORCED: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				; FORCED-NEXT: $xmm15 = MOV64toPQIrr $rax
				; FORCED-NEXT: $xmm0 = MOV64toPQIrr $rax
				; FORCED-NEXT: $rax = MOVPQIto64rr $xmm0
				; FORCED-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				; FORCED-NEXT: RET 0
				$rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				$xmm15 = MOV64toPQIrr $rax
				MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
				; reload
				$rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
				MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				RET 0
				...

llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s

				# Check that Spill2reg is disabled if the NoImplicitFloat attribute is set.

				--- \|
				@D0 = dso_local local_unnamed_addr global i32 0, align 4
				@U0 = dso_local local_unnamed_addr global i32 0, align 4
				define void @func() #0 { ret void }

				attributes #0 = { noimplicitfloat }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 4, alignment: 4 }
				machineFunctionInfo: {}
				body: \|
				bb.0:
				; CHECK-LABEL: name: func
				; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				; CHECK-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; CHECK-NEXT: RET 0
				$eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)

				$eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				RET 0
				...

llvm/test/CodeGen/X86/spill2reg_mask_spills.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx512f --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s

				# Checks that spills reading from $k mask registers are skipped by Spill2Reg.

				--- \|
				@D0 = dso_local local_unnamed_addr global i32 0, align 4
				@U0 = dso_local local_unnamed_addr global i32 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 4, alignment: 4 }
				machineFunctionInfo: {}
				body: \|
				bb.0:
				liveins: $k1
				; CHECK-LABEL: name: func
				; CHECK: liveins: $k1
				; CHECK-NEXT: {{ $}}
				; CHECK-NEXT: KMOVWmk %stack.0, 1, $noreg, 0, $noreg, killed renamable $k1 :: (store (s16) into %stack.0)
				; CHECK-NEXT: renamable $k1 = KMOVWkm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0)
				; CHECK-NEXT: RET 0
				KMOVWmk %stack.0, 1, $noreg, 0, $noreg, killed renamable $k1 :: (store (s16) into %stack.0)
				renamable $k1 = KMOVWkm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0)
				RET 0
				...

llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck --check-prefix=NOSSE %s

				# Simple test with a single spill-reload pair (32-bit version):
				# spill stack.0
				# reload stack.0

				--- \|
				@D0 = dso_local local_unnamed_addr global i32 0, align 4
				@U0 = dso_local local_unnamed_addr global i32 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 4, alignment: 4 }
				machineFunctionInfo: {}
				body: \|


				bb.0:
				; spill
				; CHECK-LABEL: name: func
				; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0
				; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; CHECK-NEXT: RET 0
				; NOSSE-LABEL: name: func
				; NOSSE: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; NOSSE-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				; NOSSE-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				; NOSSE-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; NOSSE-NEXT: RET 0
				$eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				; reload
				$eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				RET 0
				...

llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck --check-prefix=NOSSE %s

				# Simple test with a single spill-reload pair (64-bit version):
				# spill stack.0
				# reload stack.0

				--- \|
				@D0 = dso_local local_unnamed_addr global i64 0, align 4
				@U0 = dso_local local_unnamed_addr global i64 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 8, alignment: 4 }
				machineFunctionInfo: {}
				body: \|


				bb.0:
				; spill
				; CHECK-LABEL: name: func
				; CHECK: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				; CHECK-NEXT: $xmm0 = MOV64toPQIrr $rax
				; CHECK-NEXT: $rax = MOVPQIto64rr $xmm0
				; CHECK-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				; CHECK-NEXT: RET 0
				; NOSSE-LABEL: name: func
				; NOSSE: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				; NOSSE-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
				; NOSSE-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
				; NOSSE-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				; NOSSE-NEXT: RET 0
				$rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
				MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
				; reload
				$rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
				MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
				RET 0
				...

llvm/test/CodeGen/X86/spill2reg_simple_2.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s

				CarrotUnsubmitted Not Done Reply Inline Actions All test cases have the option -spill2reg-mem-instrs=0. It looks to me more like a debug purpose option. A more practical and positive performance impact value should be larger than 0. Could you add a test case for it. Carrot: All test cases have the option -spill2reg-mem-instrs=0. It looks to me more like a debug…
				vporpoAuthorUnsubmitted Not Done Reply Inline Actions Yeah this is basically disabling the heuristic so that we can check the functionality even in small tests. I am actually including some end-to-end tests with -spill2reg-mem-instrs set to default in the followup patch (https://reviews.llvm.org/D118303). But yes, I agree, I should add one more test here to exercise this option. vporpo: Yeah this is basically disabling the heuristic so that we can check the functionality even in…
				# Simple test with two overlapping spill-reload pairs.
				# spill stack.0
				# spill stack.1
				# reload stack.0
				# reload stack.1

				--- \|
				@D0 = dso_local local_unnamed_addr global i32 0, align 4
				@D1 = dso_local local_unnamed_addr global i32 0, align 4
				@U0 = dso_local local_unnamed_addr global i32 0, align 4
				@U1 = dso_local local_unnamed_addr global i32 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 4, alignment: 4 }
				- { id: 1, type: spill-slot, size: 4, alignment: 4 }
				machineFunctionInfo: {}
				body: \|

				bb.0:
				; CHECK-LABEL: name: func
				; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; CHECK-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0
				; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0
				; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				; CHECK-NEXT: RET 0
				$eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				$eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.1)

				$eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				$eax = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
				MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				RET 0

				...

llvm/test/CodeGen/X86/spill2reg_simple_3.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 \| FileCheck %s --check-prefix=MEM0
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=50 -spill2reg-vec-instrs=99999 \| FileCheck %s --check-prefix=MEM50
				# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=100 -spill2reg-vec-instrs=99999 \| FileCheck %s --check-prefix=MEM100


				# Simple test with several overlapping spill-reload pairs.
				# This tests the -spill2reg-mem-instrs flag.

				# spill stack.0
				# spill stack.1
				# spill stack.2
				# spill stack.3
				# spill stack.4
				# spill stack.5
				# spill stack.6
				# spill stack.7
				# reload stack.0
				# reload stack.1
				# reload stack.2
				# reload stack.3
				# reload stack.4
				# reload stack.5
				# reload stack.6
				# reload stack.7

				--- \|
				@D0 = dso_local local_unnamed_addr global i32 0, align 4
				@D1 = dso_local local_unnamed_addr global i32 0, align 4
				@D2 = dso_local local_unnamed_addr global i32 0, align 4
				@D3 = dso_local local_unnamed_addr global i32 0, align 4
				@D4 = dso_local local_unnamed_addr global i32 0, align 4
				@D5 = dso_local local_unnamed_addr global i32 0, align 4
				@D6 = dso_local local_unnamed_addr global i32 0, align 4
				@D7 = dso_local local_unnamed_addr global i32 0, align 4
				@U0 = dso_local local_unnamed_addr global i32 0, align 4
				@U1 = dso_local local_unnamed_addr global i32 0, align 4
				@U2 = dso_local local_unnamed_addr global i32 0, align 4
				@U3 = dso_local local_unnamed_addr global i32 0, align 4
				@U4 = dso_local local_unnamed_addr global i32 0, align 4
				@U5 = dso_local local_unnamed_addr global i32 0, align 4
				@U6 = dso_local local_unnamed_addr global i32 0, align 4
				@U7 = dso_local local_unnamed_addr global i32 0, align 4
				define void @func() { ret void }
				...
				---
				name: func
				alignment: 16
				tracksRegLiveness: true
				tracksDebugUserValues: true
				frameInfo:
				maxAlignment: 4
				stack:
				- { id: 0, type: spill-slot, size: 4, alignment: 4 }
				- { id: 1, type: spill-slot, size: 4, alignment: 4 }
				- { id: 2, type: spill-slot, size: 4, alignment: 4 }
				- { id: 3, type: spill-slot, size: 4, alignment: 4 }
				- { id: 4, type: spill-slot, size: 4, alignment: 4 }
				- { id: 5, type: spill-slot, size: 4, alignment: 4 }
				- { id: 6, type: spill-slot, size: 4, alignment: 4 }
				- { id: 7, type: spill-slot, size: 4, alignment: 4 }
				machineFunctionInfo: {}
				body: \|

				bb.0:
				; MEM0-LABEL: name: func
				; MEM0: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D2, $noreg :: (dereferenceable load (s32) from @D2)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D3, $noreg :: (dereferenceable load (s32) from @D3)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D4, $noreg :: (dereferenceable load (s32) from @D4)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D5, $noreg :: (dereferenceable load (s32) from @D5)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D6, $noreg :: (dereferenceable load (s32) from @D6)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D7, $noreg :: (dereferenceable load (s32) from @D7)
				; MEM0-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U2, $noreg, killed renamable $eax :: (store (s32) into @U2)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U3, $noreg, killed renamable $eax :: (store (s32) into @U3)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U4, $noreg, killed renamable $eax :: (store (s32) into @U4)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U5, $noreg, killed renamable $eax :: (store (s32) into @U5)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U6, $noreg, killed renamable $eax :: (store (s32) into @U6)
				; MEM0-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM0-NEXT: MOV32mr $rip, 1, $noreg, @U7, $noreg, killed renamable $eax :: (store (s32) into @U7)
				; MEM0-NEXT: RET 0
				; MEM50-LABEL: name: func
				; MEM50: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; MEM50-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D2, $noreg :: (dereferenceable load (s32) from @D2)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D3, $noreg :: (dereferenceable load (s32) from @D3)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D4, $noreg :: (dereferenceable load (s32) from @D4)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D5, $noreg :: (dereferenceable load (s32) from @D5)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D6, $noreg :: (dereferenceable load (s32) from @D6)
				; MEM50-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM50-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D7, $noreg :: (dereferenceable load (s32) from @D7)
				; MEM50-NEXT: MOV32mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.7)
				; MEM50-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U2, $noreg, killed renamable $eax :: (store (s32) into @U2)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U3, $noreg, killed renamable $eax :: (store (s32) into @U3)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U4, $noreg, killed renamable $eax :: (store (s32) into @U4)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U5, $noreg, killed renamable $eax :: (store (s32) into @U5)
				; MEM50-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U6, $noreg, killed renamable $eax :: (store (s32) into @U6)
				; MEM50-NEXT: $eax = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7)
				; MEM50-NEXT: MOV32mr $rip, 1, $noreg, @U7, $noreg, killed renamable $eax :: (store (s32) into @U7)
				; MEM50-NEXT: RET 0
				; MEM100-LABEL: name: func
				; MEM100: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				; MEM100-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				; MEM100-NEXT: MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.1)
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D2, $noreg :: (dereferenceable load (s32) from @D2)
				; MEM100-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D3, $noreg :: (dereferenceable load (s32) from @D3)
				; MEM100-NEXT: MOV32mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.3)
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D4, $noreg :: (dereferenceable load (s32) from @D4)
				; MEM100-NEXT: $xmm0 = MOVDI2PDIrr $eax
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D5, $noreg :: (dereferenceable load (s32) from @D5)
				; MEM100-NEXT: MOV32mr %stack.5, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.5)
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D6, $noreg :: (dereferenceable load (s32) from @D6)
				; MEM100-NEXT: MOV32mr %stack.6, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.6)
				; MEM100-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D7, $noreg :: (dereferenceable load (s32) from @D7)
				; MEM100-NEXT: MOV32mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.7)
				; MEM100-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				; MEM100-NEXT: $eax = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				; MEM100-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U2, $noreg, killed renamable $eax :: (store (s32) into @U2)
				; MEM100-NEXT: $eax = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U3, $noreg, killed renamable $eax :: (store (s32) into @U3)
				; MEM100-NEXT: $eax = MOVPDI2DIrr $xmm0
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U4, $noreg, killed renamable $eax :: (store (s32) into @U4)
				; MEM100-NEXT: $eax = MOV32rm %stack.5, 1, $noreg, 0, $noreg :: (load (s32) from %stack.5)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U5, $noreg, killed renamable $eax :: (store (s32) into @U5)
				; MEM100-NEXT: $eax = MOV32rm %stack.6, 1, $noreg, 0, $noreg :: (load (s32) from %stack.6)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U6, $noreg, killed renamable $eax :: (store (s32) into @U6)
				; MEM100-NEXT: $eax = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7)
				; MEM100-NEXT: MOV32mr $rip, 1, $noreg, @U7, $noreg, killed renamable $eax :: (store (s32) into @U7)
				; MEM100-NEXT: RET 0
				$eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
				MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0)
				$eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1)
				MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.1)
				$eax = MOV32rm $rip, 1, $noreg, @D2, $noreg :: (dereferenceable load (s32) from @D2)
				MOV32mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.2)
				$eax = MOV32rm $rip, 1, $noreg, @D3, $noreg :: (dereferenceable load (s32) from @D3)
				MOV32mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.3)
				$eax = MOV32rm $rip, 1, $noreg, @D4, $noreg :: (dereferenceable load (s32) from @D4)
				MOV32mr %stack.4, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.4)
				$eax = MOV32rm $rip, 1, $noreg, @D5, $noreg :: (dereferenceable load (s32) from @D5)
				MOV32mr %stack.5, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.5)
				$eax = MOV32rm $rip, 1, $noreg, @D6, $noreg :: (dereferenceable load (s32) from @D6)
				MOV32mr %stack.6, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.6)
				$eax = MOV32rm $rip, 1, $noreg, @D7, $noreg :: (dereferenceable load (s32) from @D7)
				MOV32mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.7)

				$eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
				MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
				$eax = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
				MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1)
				$eax = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2)
				MOV32mr $rip, 1, $noreg, @U2, $noreg, killed renamable $eax :: (store (s32) into @U2)
				$eax = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3)
				MOV32mr $rip, 1, $noreg, @U3, $noreg, killed renamable $eax :: (store (s32) into @U3)
				$eax = MOV32rm %stack.4, 1, $noreg, 0, $noreg :: (load (s32) from %stack.4)
				MOV32mr $rip, 1, $noreg, @U4, $noreg, killed renamable $eax :: (store (s32) into @U4)
				$eax = MOV32rm %stack.5, 1, $noreg, 0, $noreg :: (load (s32) from %stack.5)
				MOV32mr $rip, 1, $noreg, @U5, $noreg, killed renamable $eax :: (store (s32) into @U5)
				$eax = MOV32rm %stack.6, 1, $noreg, 0, $noreg :: (load (s32) from %stack.6)
				MOV32mr $rip, 1, $noreg, @U6, $noreg, killed renamable $eax :: (store (s32) into @U6)
				$eax = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7)
				MOV32mr $rip, 1, $noreg, @U7, $noreg, killed renamable $eax :: (store (s32) into @U7)
				RET 0

				...

This is an archive of the discontinued LLVM Phabricator instance.

[Spill2Reg][5/9] Code generation part 2.
Needs ReviewPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 449359

llvm/lib/CodeGen/Spill2Reg.cpp

llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir

llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir

llvm/test/CodeGen/X86/spill2reg_mask_spills.mir

llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir

llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir

llvm/test/CodeGen/X86/spill2reg_simple_2.mir

llvm/test/CodeGen/X86/spill2reg_simple_3.mir

This is an archive of the discontinued LLVM Phabricator instance.

[Spill2Reg][5/9] Code generation part 2.Needs ReviewPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 449359

llvm/lib/CodeGen/Spill2Reg.cpp

llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir

llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir

llvm/test/CodeGen/X86/spill2reg_mask_spills.mir

llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir

llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir

llvm/test/CodeGen/X86/spill2reg_simple_2.mir

llvm/test/CodeGen/X86/spill2reg_simple_3.mir

[Spill2Reg][5/9] Code generation part 2.
Needs ReviewPublic