diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp --- a/llvm/lib/CodeGen/Spill2Reg.cpp +++ b/llvm/lib/CodeGen/Spill2Reg.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "AllocationOrder.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -29,6 +30,9 @@ using namespace llvm; +#define DEBUG_TYPE "Spill2Reg" +STATISTIC(NumSpill2RegInstrs, "Number of spills/reloads replaced by spill2reg"); + namespace { class Spill2Reg : public MachineFunctionPass { @@ -198,8 +202,7 @@ } bool Spill2Reg::isProfitable(const MachineInstr *MI) const { - // TODO: Unimplemented. - return true; + return TII->isSpill2RegProfitable(MI, TRI, MRI); } bool Spill2Reg::allAccessesProfitable(const StackSlotDataEntry &Entry) const { @@ -223,7 +226,33 @@ // Replace stack-based spills/reloads with register-based ones. void Spill2Reg::replaceStackWithReg(StackSlotDataEntry &Entry, Register VectorReg) { - // TODO: Unimplemented + for (StackSlotDataEntry::MIData &SpillData : Entry.Spills) { + MachineInstr *StackSpill = SpillData.MI; + assert(SpillData.MO->isReg() && "Expected register MO"); + Register OldReg = SpillData.MO->getReg(); + + MachineInstr *SpillToVector = TII->spill2RegInsertToVectorReg( + VectorReg, OldReg, SpillData.SpillBits, StackSpill->getParent(), + /*InsertBeforeIt=*/StackSpill->getIterator(), TRI); + + // Spill to stack is no longer needed. + StackSpill->eraseFromParent(); + assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()"); + } + + for (StackSlotDataEntry::MIData &ReloadData : Entry.Reloads) { + MachineInstr *StackReload = ReloadData.MI; + assert(ReloadData.MO->isReg() && "Expected Reg MO"); + Register OldReg = ReloadData.MO->getReg(); + + MachineInstr *ReloadFromReg = TII->spill2RegExtractFromVectorReg( + OldReg, VectorReg, ReloadData.SpillBits, StackReload->getParent(), + /*InsertBeforeIt=*/StackReload->getIterator(), TRI); + + // Reload from stack is no longer needed. + StackReload->eraseFromParent(); + assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()"); + } } void Spill2Reg::calculateLiveRegs(StackSlotDataEntry &Entry, @@ -255,6 +284,8 @@ // Replace stack accesses with register accesses. replaceStackWithReg(Entry, *PhysVectorRegOpt); + + NumSpill2RegInstrs += Entry.Spills.size() + Entry.Reloads.size(); } } diff --git a/llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir b/llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_avoid_vector_instrs.mir @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 | FileCheck %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s --check-prefix=FORCED + +# Simple test to confirm that spill2reg won't apply if there is a vector +# instruction nearby. + +--- | + @D0 = dso_local local_unnamed_addr global i64 0, align 4 + @U0 = dso_local local_unnamed_addr global i64 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: {} +body: | + + + bb.0: + ; CHECK-LABEL: name: func + ; CHECK: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; CHECK-NEXT: $xmm15 = MOV64toPQIrr $rax + ; CHECK-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) + ; CHECK-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; CHECK-NEXT: RET 0 + ; FORCED-LABEL: name: func + ; FORCED: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; FORCED-NEXT: $xmm15 = MOV64toPQIrr $rax + ; FORCED-NEXT: $xmm0 = MOV64toPQIrr $rax + ; FORCED-NEXT: $rax = MOVPQIto64rr $xmm0 + ; FORCED-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; FORCED-NEXT: RET 0 + $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + $xmm15 = MOV64toPQIrr $rax + MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) + ; reload + $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir b/llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_disable_when_noimplicitfloat.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s + +# Check that Spill2reg is disabled if the NoImplicitFloat attribute is set. + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() #0 { ret void } + + attributes #0 = { noimplicitfloat } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0: + ; CHECK-LABEL: name: func + ; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + ; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0) + ; CHECK-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + ; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + ; CHECK-NEXT: RET 0 + $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0) + + $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_mask_spills.mir b/llvm/test/CodeGen/X86/spill2reg_mask_spills.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_mask_spills.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx512f --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s + +# Checks that spills reading from $k mask registers are skipped by Spill2Reg. + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $k1 + ; CHECK-LABEL: name: func + ; CHECK: liveins: $k1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KMOVWmk %stack.0, 1, $noreg, 0, $noreg, killed renamable $k1 :: (store (s16) into %stack.0) + ; CHECK-NEXT: renamable $k1 = KMOVWkm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0) + ; CHECK-NEXT: RET 0 + KMOVWmk %stack.0, 1, $noreg, 0, $noreg, killed renamable $k1 :: (store (s16) into %stack.0) + renamable $k1 = KMOVWkm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir @@ -0,0 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s + +# Simple test with a single spill-reload pair (32-bit version): +# spill stack.0 +# reload stack.0 + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + + + bb.0: + ; spill + ; CHECK-LABEL: name: func + ; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + ; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax + ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 + ; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + ; CHECK-NEXT: RET 0 + ; NOSSE-LABEL: name: func + ; NOSSE: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + ; NOSSE-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0) + ; NOSSE-NEXT: $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + ; NOSSE-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + ; NOSSE-NEXT: RET 0 + $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0) + ; reload + $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir @@ -0,0 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s + +# Simple test with a single spill-reload pair (64-bit version): +# spill stack.0 +# reload stack.0 + +--- | + @D0 = dso_local local_unnamed_addr global i64 0, align 4 + @U0 = dso_local local_unnamed_addr global i64 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: {} +body: | + + + bb.0: + ; spill + ; CHECK-LABEL: name: func + ; CHECK: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; CHECK-NEXT: $xmm0 = MOV64toPQIrr $rax + ; CHECK-NEXT: $rax = MOVPQIto64rr $xmm0 + ; CHECK-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; CHECK-NEXT: RET 0 + ; NOSSE-LABEL: name: func + ; NOSSE: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; NOSSE-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) + ; NOSSE-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; NOSSE-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; NOSSE-NEXT: RET 0 + $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) + ; reload + $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_2.mir b/llvm/test/CodeGen/X86/spill2reg_simple_2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_simple_2.mir @@ -0,0 +1,52 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s + +# Simple test with two overlapping spill-reload pairs. +# spill stack.0 +# spill stack.1 +# reload stack.0 +# reload stack.1 + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @D1 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + @U1 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + + bb.0: + ; CHECK-LABEL: name: func + ; CHECK: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + ; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax + ; CHECK-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1) + ; CHECK-NEXT: $xmm1 = MOVDI2PDIrr $eax + ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 + ; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm1 + ; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1) + ; CHECK-NEXT: RET 0 + $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.0) + $eax = MOV32rm $rip, 1, $noreg, @D1, $noreg :: (dereferenceable load (s32) from @D1) + MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %stack.1) + + $eax = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + $eax = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1) + MOV32mr $rip, 1, $noreg, @U1, $noreg, killed renamable $eax :: (store (s32) into @U1) + RET 0 + +...