diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -363,3 +363,25 @@ def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, (sequence "VSL%u", 0, 19))>; + +def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>; + +def CSR_VSRP : + CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>; + +def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>; + +def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>; + +def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>; + +def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec, + (sub CSR_ALL_VSRP, VSRp17))>; + +def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC, + (sub CSR_ALL_VSRP, VSRp17))>; + +def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>; + +def CSR_64_AllRegs_VSRP : + CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1974,6 +1974,15 @@ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + // Do not explicitly save the callee saved VSRp registers. + // The individual VSR subregisters will be saved instead. + SavedRegs.reset(PPC::VSRp26); + SavedRegs.reset(PPC::VSRp27); + SavedRegs.reset(PPC::VSRp28); + SavedRegs.reset(PPC::VSRp29); + SavedRegs.reset(PPC::VSRp30); + SavedRegs.reset(PPC::VSRp31); + // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo(); unsigned LR = RegInfo->getRARegister(); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -183,6 +183,8 @@ if (!TM.isPPC64() && Subtarget.isAIXABI()) report_fatal_error("AnyReg unimplemented on 32-bit AIX."); if (Subtarget.hasVSX()) { + if (Subtarget.pairedVectorMemops()) + return CSR_64_AllRegs_VSRP_SaveList; if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList; return CSR_64_AllRegs_VSX_SaveList; @@ -210,6 +212,9 @@ if (Subtarget.isAIXABI()) report_fatal_error("Cold calling unimplemented on AIX."); if (TM.isPPC64()) { + if (Subtarget.pairedVectorMemops()) + return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList + : CSR_SVR64_ColdCC_VSRP_SaveList; if (Subtarget.hasAltivec()) return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList : CSR_SVR64_ColdCC_Altivec_SaveList; @@ -217,7 +222,9 @@ : CSR_SVR64_ColdCC_SaveList; } // 32-bit targets. - if (Subtarget.hasAltivec()) + if (Subtarget.pairedVectorMemops()) + return CSR_SVR32_ColdCC_VSRP_SaveList; + else if (Subtarget.hasAltivec()) return CSR_SVR32_ColdCC_Altivec_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR32_ColdCC_SPE_SaveList; @@ -225,6 +232,8 @@ } // Standard calling convention CSRs. if (TM.isPPC64()) { + if (Subtarget.pairedVectorMemops()) + return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList; if (Subtarget.hasAltivec() && (!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) { return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList @@ -239,6 +248,8 @@ : CSR_AIX32_SaveList; return CSR_AIX32_SaveList; } + if (Subtarget.pairedVectorMemops()) + return CSR_SVR432_VSRP_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; else if (Subtarget.hasSPE()) @@ -252,6 +263,8 @@ const PPCSubtarget &Subtarget = MF.getSubtarget(); if (CC == CallingConv::AnyReg) { if (Subtarget.hasVSX()) { + if (Subtarget.pairedVectorMemops()) + return CSR_64_AllRegs_VSRP_RegMask; if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask; return CSR_64_AllRegs_VSX_RegMask; @@ -275,20 +288,32 @@ } if (CC == CallingConv::Cold) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask - : CSR_SVR64_ColdCC_RegMask) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : (Subtarget.hasSPE() - ? CSR_SVR32_ColdCC_SPE_RegMask - : CSR_SVR32_ColdCC_RegMask)); + if (TM.isPPC64()) + return Subtarget.pairedVectorMemops() + ? CSR_SVR64_ColdCC_VSRP_RegMask + : (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask + : CSR_SVR64_ColdCC_RegMask); + else + return Subtarget.pairedVectorMemops() + ? CSR_SVR32_ColdCC_VSRP_RegMask + : (Subtarget.hasAltivec() + ? CSR_SVR32_ColdCC_Altivec_RegMask + : (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask)); } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask - : CSR_PPC64_RegMask) - : (Subtarget.hasAltivec() - ? CSR_SVR432_Altivec_RegMask - : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask - : CSR_SVR432_RegMask)); + if (TM.isPPC64()) + return Subtarget.pairedVectorMemops() + ? CSR_SVR464_VSRP_RegMask + : (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask); + else + return Subtarget.pairedVectorMemops() + ? CSR_SVR432_VSRP_RegMask + : (Subtarget.hasAltivec() + ? CSR_SVR432_Altivec_RegMask + : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask)); } const uint32_t* diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll @@ -13,23 +13,29 @@ ; CHECK-LABEL: intrinsics1: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -176(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 176 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18 -; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18 -; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17 -; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17 -; CHECK-NEXT: xxlor vs0, v2, v2 -; CHECK-NEXT: xxlor vs1, v3, v3 -; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill -; CHECK-NEXT: xxlor vs2, v4, v4 -; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: .cfi_offset v28, -80 +; CHECK-NEXT: .cfi_offset v29, -64 +; CHECK-NEXT: .cfi_offset v30, -48 +; CHECK-NEXT: .cfi_offset v31, -32 +; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: vmr v28, v2 +; CHECK-NEXT: xxlor vs0, v28, v28 +; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: xxlor vs1, v29, v29 +; CHECK-NEXT: xxlor vs2, v30, v30 +; CHECK-NEXT: xxlor vs3, v31, v31 ; CHECK-NEXT: ld r30, 272(r1) -; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill ; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: xvf16ger2pp acc0, v2, v4 ; CHECK-NEXT: xxmfacc acc0 @@ -39,17 +45,19 @@ ; CHECK-NEXT: lxvp vsp0, 64(r1) ; CHECK-NEXT: lxvp vsp2, 32(r1) ; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload -; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload -; CHECK-NEXT: xvf16ger2pp acc0, v2, v4 +; CHECK-NEXT: xvf16ger2pp acc0, v28, v30 +; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload ; CHECK-NEXT: xxmfacc acc0 ; CHECK-NEXT: stxv vs0, 48(r30) ; CHECK-NEXT: stxv vs1, 32(r30) ; CHECK-NEXT: stxv vs2, 16(r30) ; CHECK-NEXT: stxv vs3, 0(r30) +; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; @@ -61,17 +69,23 @@ ; CHECK-BE-NEXT: .cfi_def_cfa_offset 256 ; CHECK-BE-NEXT: .cfi_offset lr, 16 ; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: .cfi_offset v28, -80 +; CHECK-BE-NEXT: .cfi_offset v29, -64 +; CHECK-BE-NEXT: .cfi_offset v30, -48 +; CHECK-BE-NEXT: .cfi_offset v31, -32 +; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: vmr v29, v3 +; CHECK-BE-NEXT: vmr v28, v2 +; CHECK-BE-NEXT: xxlor vs0, v28, v28 +; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: vmr v31, v5 +; CHECK-BE-NEXT: vmr v30, v4 ; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18 -; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18 -; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17 -; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17 -; CHECK-BE-NEXT: xxlor vs0, v2, v2 -; CHECK-BE-NEXT: xxlor vs1, v3, v3 -; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill -; CHECK-BE-NEXT: xxlor vs2, v4, v4 -; CHECK-BE-NEXT: xxlor vs3, v5, v5 -; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill +; CHECK-BE-NEXT: xxlor vs1, v29, v29 +; CHECK-BE-NEXT: xxlor vs2, v30, v30 +; CHECK-BE-NEXT: xxlor vs3, v31, v31 ; CHECK-BE-NEXT: ld r30, 368(r1) ; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4 @@ -83,9 +97,11 @@ ; CHECK-BE-NEXT: lxvp vsp0, 112(r1) ; CHECK-BE-NEXT: lxvp vsp2, 144(r1) ; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload -; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload -; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4 +; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30 +; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs1, 16(r30) ; CHECK-BE-NEXT: stxv vs0, 0(r30)