Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -46,6 +46,11 @@ #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" + +STATISTIC(StoreVSRSPILLVec, "Number of vector spills to stack of gpfprc"); +STATISTIC(StoreVSRSPILLGpr, "Number of gpr spills to stack of gpfprc"); +STATISTIC(NumMTVSR, "Number of gpr spills to gpfprc"); + static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); @@ -280,6 +285,7 @@ case PPC::QVLFSXs: case PPC::QVLFDXb: case PPC::RESTORE_VRSAVE: + case PPC::VSRSPILL_LD: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -310,6 +316,7 @@ case PPC::QVSTFSXs: case PPC::QVSTFDXb: case PPC::SPILL_VRSAVE: + case PPC::VSRSPILL_ST: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -894,7 +901,18 @@ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } + } else if (PPC::G8RCRegClass.contains(SrcReg) && + PPC::VSFRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); + NumMTVSR++; + getKillRegState(KillSrc); + return; + } else if (PPC::G8RCRegClass.contains(DestReg) && + PPC::VSFRCRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); + return; + } unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) @@ -1038,6 +1056,11 @@ getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::GPFPRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::VSRSPILL_ST)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1159,6 +1182,9 @@ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::GPFPRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::VSRSPILL_LD), + DestReg), FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1962,6 +1988,45 @@ MI.setDesc(get(Opcode)); return true; } + case PPC::VSRSPILL_LD: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) + MI.setDesc(get(PPC::DFLOADf64)); + else + MI.setDesc(get(PPC::LD)); + return true; + } + case PPC::VSRSPILL_ST: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) { + StoreVSRSPILLVec++; + MI.setDesc(get(PPC::DFSTOREf64)); + } else { + StoreVSRSPILLGpr++; + MI.setDesc(get(PPC::STD)); + } + return true; + } + case PPC::VSRSPILL_LDX: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) + MI.setDesc(get(PPC::LXSDX)); + else + MI.setDesc(get(PPC::LDX)); + return true; + } + case PPC::VSRSPILL_STX: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) { + StoreVSRSPILLVec++; + MI.setDesc(get(PPC::STXSDX)); + } else { + StoreVSRSPILLGpr++; + MI.setDesc(get(PPC::STDX)); + } + return true; + } + case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val); Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -47,6 +47,13 @@ let ParserMatchClass = PPCRegVSSRCAsmOperand; } +def PPCRegGPFPRCAsmOperand : AsmOperandClass { + let Name = "RegGPFPRC"; let PredicateMethod = "isVSRegNumber"; +} + +def gpfprc : RegisterOperand { + let ParserMatchClass = PPCRegGPFPRCAsmOperand; +} // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -2703,6 +2710,23 @@ (f32 (DFLOADf32 iaddr:$src))>; } // end HasP9Vector, AddedComplexity +let Predicates = [HasP9Vector] in { + let isPseudo = 1 in { + let mayStore = 1 in { + def VSRSPILL_STX : Pseudo<(outs), (ins gpfprc:$XT, memrr:$dst), + "#VSRSPILL_STX", []>; + def VSRSPILL_ST : Pseudo<(outs), (ins gpfprc:$XT, memrix:$dst), + "#VSRSPILL_ST", []>; + } + let mayLoad = 1 in { + def VSRSPILL_LDX : Pseudo<(outs gpfprc:$XT), (ins memrr:$src), + "#VSRSPILL_LDX", []>; + def VSRSPILL_LD : Pseudo<(outs gpfprc:$XT), (ins memrix:$src), + "#VSRSPILL_LD", []>; + + } + } +} // Integer extend helper dags 32 -> 64 def AnyExts { dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -57,6 +57,10 @@ AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), cl::desc("Force the use of a base pointer in every function")); +static cl::opt +EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false), + cl::desc("Enable spills from gpr to vsr rather than stack")); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -82,6 +86,8 @@ // VSX ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX; + ImmToIdxMap[PPC::VSRSPILL_LD] = PPC::VSRSPILL_LDX; + ImmToIdxMap[PPC::VSRSPILL_ST] = PPC::VSRSPILL_STX; ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX; ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX; ImmToIdxMap[PPC::LXV] = PPC::LXVX; @@ -314,6 +320,10 @@ // With VSX, we can inflate various sub-register classes to the full VSX // register set. + // For pwr9 we enable gpr to vector spills + if (Subtarget.hasP9Vector() && EnableGPRToVecSpills && + RC == &PPC::G8RCRegClass) + return &PPC::GPFPRCRegClass; if (RC == &PPC::F8RCRegClass) return &PPC::VSFRCRegClass; else if (RC == &PPC::VRRCRegClass) Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -305,6 +305,8 @@ VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +def GPFPRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, VSFRC)>; + // Register class for single precision scalars in VSX registers def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; Index: test/CodeGen/PowerPC/gpr-vsr-spill.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/gpr-vsr-spill.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-enable-gpr-to-vsr-spills < %s | FileCheck %s +define signext i32 @foo(i32 signext %a, i32 signext %b) { +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29}"(i32 %a, i32 %b) + %mul = mul nsw i32 %0, %a + %add = add i32 %b, %a + %tmp = add i32 %add, %mul + br label %if.end + +if.end: ; preds = %if.then, %entry + %e.0 = phi i32 [ %tmp, %if.then ], [ undef, %entry ] + ret i32 %e.0 +; CHECK: @foo +; CHECK: mr 31, 3 +; CHECK: mtvsrd 0, 4 +; CHECK: mffprd 30, 0 +; CHECK: add 30, 31, 30 +; CHECK: mffprd 3, 0 +; CHECK: add 3, 3, 31 +} Index: test/CodeGen/PowerPC/gpr-vsr-spill2.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/gpr-vsr-spill2.ll @@ -0,0 +1,129 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-enable-gpr-to-vsr-spills < %s | FileCheck %s + +%struct.move_s = type { i32, i32, i32, i32, i32, i32 } + +@nodes = external local_unnamed_addr global i32, align 4 +@rootnodecount = external local_unnamed_addr global [512 x i32], align 4 +@pv_length = external local_unnamed_addr global [300 x i32], align 4 +@captures = external local_unnamed_addr global i32, align 4 +@cur_score = external local_unnamed_addr global i32, align 4 +@rootlosers = external local_unnamed_addr global [300 x i32], align 4 + +declare void @gen(%struct.move_s*) + +declare zeroext i32 @in_check() + +declare void @make(%struct.move_s*, i32 signext) + +declare zeroext i32 @check_legal(%struct.move_s*, i32 signext, i32 signext) + +declare signext i32 @search(i32 signext, i32 signext, i32 signext, i32 signext) + +declare void @post_thinking(i32 signext) + +define void @search_root(%struct.move_s* noalias nocapture sret %agg.result, i32 signext %originalalpha, i32 signext %originalbeta, i32 signext %depth) { +; CHECK: mr [[NEWREG:[0-9]+]], {{[0-9]+}} +; CHECK: std [[NEWREG]], {{[0-9]+}}(1) # 8-byte Folded Spill + +entry: + %moves = alloca [512 x %struct.move_s], align 4 + %move_ordering = alloca [512 x i32], align 4 + %call5 = tail call zeroext i32 @in_check() + store i32 0, i32* undef, align 4, !tbaa !0 + br i1 undef, label %if.then17, label %if.else + +if.then17: ; preds = %entry + call void @gen(%struct.move_s* nonnull undef) + store i32 0, i32* @captures, align 4, !tbaa !0 + call void @make(%struct.move_s* nonnull undef, i32 signext undef) + br label %if.end51 + +if.else: ; preds = %entry + %arrayidx50 = getelementptr inbounds [512 x %struct.move_s], [512 x %struct.move_s]* %moves, i64 0, i64 0 + call void @gen(%struct.move_s* nonnull %arrayidx50) + br label %if.end51 + +if.end51: ; preds = %if.else, %if.then17 + %arrayidx52.pre-phi = phi %struct.move_s* [ %arrayidx50, %if.else ], [ undef, %if.then17 ] + br label %while.cond.outer + +while.cond.outer: ; preds = %if.end248, %if.end51 + %root_score.0.ph = phi i32 [ %root_score.0.ph, %if.end248 ], [ -1000000, %if.end51 ] + %alpha.0.ph = phi i32 [ %root_score.0.ph, %if.end248 ], [ %originalalpha, %if.end51 ] + br label %while.cond + +while.cond: ; preds = %while.body, %while.cond.outer + %arrayidx.i.2 = getelementptr inbounds [512 x i32], [512 x i32]* %move_ordering, i64 0, i64 0 + %0 = load i32, i32* %arrayidx.i.2, align 4, !tbaa !3 + %cmp1.i.2 = icmp sgt i32 %0, 0 + %.best.022.i.2 = select i1 %cmp1.i.2, i32 %0, i32 0 + %indvars.iv.next.i.2 = or i64 0, 3 + %1 = load i32, i32* undef, align 4, !tbaa !3 + %cmp1.i.3 = icmp sgt i32 %1, %.best.022.i.2 + %2 = trunc i64 %indvars.iv.next.i.2 to i32 + %..3 = select i1 %cmp1.i.3, i32 %2, i32 0 + %.best.022.i.3 = select i1 %cmp1.i.3, i32 %1, i32 %.best.022.i.2 + %arrayidx.i.4 = getelementptr inbounds [512 x i32], [512 x i32]* %move_ordering, i64 0, i64 undef + %3 = load i32, i32* %arrayidx.i.4, align 4, !tbaa !3 + %cmp1.i.4 = icmp sgt i32 %3, %.best.022.i.3 + %..4 = select i1 %cmp1.i.4, i32 undef, i32 %..3 + %.best.022.i.4 = select i1 %cmp1.i.4, i32 %3, i32 %.best.022.i.3 + %indvars.iv.next.i.4 = or i64 0, 5 + %4 = load i32, i32* undef, align 4, !tbaa !3 + %cmp1.i.5 = icmp sgt i32 %4, %.best.022.i.4 + %5 = trunc i64 %indvars.iv.next.i.4 to i32 + %..5 = select i1 %cmp1.i.5, i32 %5, i32 %..4 + %indvars.iv.next.i.5 = or i64 0, 6 + %arrayidx.i.6 = getelementptr inbounds [512 x i32], [512 x i32]* %move_ordering, i64 0, i64 %indvars.iv.next.i.5 + %6 = load i32, i32* %arrayidx.i.6, align 4, !tbaa !3 + %cmp1.i.6 = icmp sgt i32 %6, 0 + %7 = trunc i64 %indvars.iv.next.i.5 to i32 + %..6 = select i1 %cmp1.i.6, i32 %7, i32 %..5 + %.best.022.i.6 = select i1 %cmp1.i.6, i32 %6, i32 0 + %indvars.iv.next.i.6 = or i64 0, 7 + %8 = load i32, i32* undef, align 4, !tbaa !3 + %cmp1.i.7 = icmp sgt i32 %8, %.best.022.i.6 + %9 = trunc i64 %indvars.iv.next.i.6 to i32 + %..7 = select i1 %cmp1.i.7, i32 %9, i32 %..6 + %cmp4.i = icmp sgt i32 %..7, -1000000 + br i1 %cmp4.i, label %while.body, label %while.end + +while.body: ; preds = %while.cond + store i32 -1000000, i32* undef, align 4, !tbaa !3 + %arrayidx60 = getelementptr inbounds [300 x i32], [300 x i32]* @rootlosers, i64 0, i64 undef + %10 = load i32, i32* %arrayidx60, align 4, !tbaa !3 + %tobool61 = icmp eq i32 %10, 0 + %brmerge = or i1 %tobool61, false + br i1 %brmerge, label %if.end66, label %while.cond + +if.end66: ; preds = %while.body + %11 = load i32, i32* @nodes, align 4, !tbaa !3 + %call78 = call zeroext i32 @check_legal(%struct.move_s* nonnull %arrayidx52.pre-phi, i32 signext undef, i32 signext %call5) + %tobool79 = icmp eq i32 %call78, 0 + br i1 %tobool79, label %if.end248, label %if.then80 + +if.then80: ; preds = %if.end66 + %sub100 = sub nsw i32 0, %alpha.0.ph + %call102 = call signext i32 @search(i32 signext undef, i32 signext %sub100, i32 signext undef, i32 signext 0) + unreachable + +if.end248: ; preds = %if.end66 + store i32 %root_score.0.ph, i32* @cur_score, align 4, !tbaa !3 + %arrayidx416655 = getelementptr inbounds [300 x i32], [300 x i32]* @pv_length, i64 0, i64 undef + %12 = load i32, i32* %arrayidx416655, align 4, !tbaa !3 + store i32 %12, i32* undef, align 4, !tbaa !3 + call void @post_thinking(i32 signext %root_score.0.ph) + %sub448 = sub nsw i32 0, %11 + %arrayidx450 = getelementptr inbounds [512 x i32], [512 x i32]* @rootnodecount, i64 0, i64 undef + store i32 %sub448, i32* %arrayidx450, align 4, !tbaa !3 + br label %while.cond.outer + +while.end: ; preds = %while.cond + ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"omnipotent char", !2, i64 0} +!2 = !{!"Simple C/C++ TBAA"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !1, i64 0}