Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -46,6 +46,12 @@ #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" +STATISTIC(NumStoreSPILLVSRRCAsVec, + "Number of spillvsrrc spilled to stack as vec"); +STATISTIC(NumStoreSPILLVSRRCAsGpr, + "Number of spillvsrrc spilled to stack as gpr"); +STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc"); + static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); @@ -280,6 +286,7 @@ case PPC::QVLFSXs: case PPC::QVLFDXb: case PPC::RESTORE_VRSAVE: + case PPC::SPILLTOVSR_LD: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -333,6 +340,7 @@ case PPC::QVSTFSXs: case PPC::QVSTFDXb: case PPC::SPILL_VRSAVE: + case PPC::SPILLTOVSR_ST: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -917,7 +925,18 @@ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } + } else if (PPC::G8RCRegClass.contains(SrcReg) && + PPC::VSFRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); + NumGPRtoVSRSpill++; + getKillRegState(KillSrc); + return; + } else if (PPC::VSFRCRegClass.contains(SrcReg) && + PPC::G8RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); + return; + } unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) @@ -1061,6 +1080,11 @@ getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_ST)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1182,6 +1206,9 @@ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_LD), + DestReg), FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1995,6 +2022,48 @@ MI.setDesc(get(Opcode)); return true; } + case PPC::SPILLTOVSR_LD: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) { + MI.setDesc(get(PPC::DFLOADf64)); + return expandPostRAPseudo(MI); + } + else + MI.setDesc(get(PPC::LD)); + return true; + } + case PPC::SPILLTOVSR_ST: { + unsigned SrcReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(SrcReg)) { + NumStoreSPILLVSRRCAsVec++; + MI.setDesc(get(PPC::DFSTOREf64)); + return expandPostRAPseudo(MI); + } else { + NumStoreSPILLVSRRCAsGpr++; + MI.setDesc(get(PPC::STD)); + } + return true; + } + case PPC::SPILLTOVSR_LDX: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) + MI.setDesc(get(PPC::LXSDX)); + else + MI.setDesc(get(PPC::LDX)); + return true; + } + case PPC::SPILLTOVSR_STX: { + unsigned SrcReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(SrcReg)) { + NumStoreSPILLVSRRCAsVec++; + MI.setDesc(get(PPC::STXSDX)); + } else { + NumStoreSPILLVSRRCAsGpr++; + MI.setDesc(get(PPC::STDX)); + } + return true; + } + case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -47,6 +47,13 @@ let ParserMatchClass = PPCRegVSSRCAsmOperand; } +def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { + let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; +} + +def spilltovsrrc : RegisterOperand { + let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; +} // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -2863,6 +2870,23 @@ (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity +let Predicates = [HasP9Vector] in { + let isPseudo = 1 in { + let mayStore = 1 in { + def SPILLTOVSR_STX : Pseudo<(outs), (ins spilltovsrrc:$XT, memrr:$dst), + "#SPILLTOVSR_STX", []>; + def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), + "#SPILLTOVSR_ST", []>; + } + let mayLoad = 1 in { + def SPILLTOVSR_LDX : Pseudo<(outs spilltovsrrc:$XT), (ins memrr:$src), + "#SPILLTOVSR_LDX", []>; + def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), + "#SPILLTOVSR_LD", []>; + + } + } +} // Integer extend helper dags 32 -> 64 def AnyExts { dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -21,6 +21,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -49,6 +50,9 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" +STATISTIC(InflateGPRC, "Number of gprc inputs for getLargestLegalClass"); +STATISTIC(InflateGP8RC, "Number of g8rc inputs for getLargestLegalClass"); + static cl::opt EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -57,6 +61,10 @@ AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), cl::desc("Force the use of a base pointer in every function")); +static cl::opt +EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false), + cl::desc("Enable spills from gpr to vsr rather than stack")); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -82,6 +90,8 @@ // VSX ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX; + ImmToIdxMap[PPC::SPILLTOVSR_LD] = PPC::SPILLTOVSR_LDX; + ImmToIdxMap[PPC::SPILLTOVSR_ST] = PPC::SPILLTOVSR_STX; ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX; ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX; ImmToIdxMap[PPC::LXV] = PPC::LXVX; @@ -328,6 +338,18 @@ // With VSX, we can inflate various sub-register classes to the full VSX // register set. + // For Power9 we allow the user to enable GPR to vector spills. + // FIXME: Currently limited to spilling GP8RC. A follow on patch will add + // support to spill GPRC. + if (TM.isELFv2ABI()) { + if (Subtarget.hasP9Vector() && EnableGPRToVecSpills && + RC == &PPC::G8RCRegClass) { + InflateGP8RC++; + return &PPC::SPILLTOVSRRCRegClass; + } + if (RC == &PPC::GPRCRegClass && EnableGPRToVecSpills) + InflateGPRC++; + } if (RC == &PPC::F8RCRegClass) return &PPC::VSFRCRegClass; else if (RC == &PPC::VRRCRegClass) Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.td @@ -305,6 +305,11 @@ VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +// Allow spilling GPR's into caller-saved VSR's. +def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC, + (sequence "VF%u", 31, 20), + (sequence "F%u", 31, 14)))>; + // Register class for single precision scalars in VSX registers def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; Index: llvm/trunk/test/CodeGen/PowerPC/gpr-vsr-spill.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/gpr-vsr-spill.ll +++ llvm/trunk/test/CodeGen/PowerPC/gpr-vsr-spill.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-enable-gpr-to-vsr-spills < %s | FileCheck %s +define signext i32 @foo(i32 signext %a, i32 signext %b) { +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29}"(i32 %a, i32 %b) + %mul = mul nsw i32 %0, %a + %add = add i32 %b, %a + %tmp = add i32 %add, %mul + br label %if.end + +if.end: ; preds = %if.then, %entry + %e.0 = phi i32 [ %tmp, %if.then ], [ undef, %entry ] + ret i32 %e.0 +; CHECK: @foo +; CHECK: mr [[NEWREG:[0-9]+]], 3 +; CHECK: mtvsrd [[NEWREG2:[0-9]+]], 4 +; CHECK: mffprd [[REG1:[0-9]+]], [[NEWREG2]] +; CHECK: add {{[0-9]+}}, [[NEWREG]], [[REG1]] +; CHECK: mffprd [[REG2:[0-9]+]], [[NEWREG2]] +; CHECK: add {{[0-9]+}}, [[REG2]], [[NEWREG]] +}