diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1175,3 +1175,21 @@ Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; } + +// Paired Single Intrinsics +let TargetPrefix = "ppc" in { +def int_ppc_paired_l: Intrinsic<[llvm_v2f32_ty], [llvm_ptr_ty, llvm_i1_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_ppc_paired_st: Intrinsic<[], [llvm_v2f32_ty, llvm_ptr_ty, llvm_i1_ty, llvm_i8_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_ppc_paired_madds0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_madds1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_merge00: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge01: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge10: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge11: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_muls0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_muls1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sel: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sum0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sum1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_broadway_dcbz_l : Intrinsic<[], [llvm_ptr_ty], []>; +} diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -467,6 +467,11 @@ Inst.addOperand(MCOperand::createReg(FRegs[getReg()])); } + void addRegPSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(PSRegs[getReg()])); + } + void addRegVFRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VFRegs[getReg()])); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -110,6 +110,12 @@ return decodeRegisterClass(Inst, RegNo, FRegs); } +static DecodeStatus DecodePSRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, PSRegs); +} + static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -152,34 +152,27 @@ using llvm::MCPhysReg; -#define DEFINE_PPC_REGCLASSES \ - static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ - static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ - static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ - static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ - static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ - static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ - static const MCPhysReg RRegsNoR0[32] = \ - PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \ - static const MCPhysReg XRegsNoX0[32] = \ - PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \ - static const MCPhysReg VSRegs[64] = \ - PPC_REGS_LO_HI(PPC::VSL, PPC::V); \ - static const MCPhysReg VSFRegs[64] = \ - PPC_REGS_LO_HI(PPC::F, PPC::VF); \ - static const MCPhysReg VSSRegs[64] = \ - PPC_REGS_LO_HI(PPC::F, PPC::VF); \ - static const MCPhysReg CRBITRegs[32] = { \ - PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \ - PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \ - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \ - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \ - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \ - PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ - PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ - PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ - static const MCPhysReg CRRegs[8] = { \ - PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \ - PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7} +#define DEFINE_PPC_REGCLASSES \ + static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ + static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ + static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ + static const MCPhysReg PSRegs[32] = PPC_REGS0_31(PPC::PSF); \ + static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ + static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ + static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ + static const MCPhysReg RRegsNoR0[32] = PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \ + static const MCPhysReg XRegsNoX0[32] = PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \ + static const MCPhysReg VSRegs[64] = PPC_REGS_LO_HI(PPC::VSL, PPC::V); \ + static const MCPhysReg VSFRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg VSSRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg CRBITRegs[32] = { \ + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, \ + PPC::CR1EQ, PPC::CR1UN, PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \ + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, PPC::CR4LT, PPC::CR4GT, \ + PPC::CR4EQ, PPC::CR4UN, PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, PPC::CR7LT, PPC::CR7GT, \ + PPC::CR7EQ, PPC::CR7UN}; \ + static const MCPhysReg CRRegs[8] = {PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \ + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7} #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -66,6 +66,9 @@ def FeatureFPU : SubtargetFeature<"fpu","HasFPU","true", "Enable classic FPU instructions", [FeatureHardFloat]>; +def FeaturePaired : SubtargetFeature<"paired","HasPaired","true", + "Enable PPC750CL Paired-Single instructions", + [FeatureFPU]>; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions", [FeatureFPU]>; @@ -467,6 +470,9 @@ def : Processor<"750", G4Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; +def : Processor<"750cl", G3Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB, FeaturePaired]>; def : Processor<"g3", G3Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -61,6 +61,7 @@ CCIfType<[f64], CCAssignToReg<[F1]>>, CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>, + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", CCAssignToReg<[PSF1]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>> @@ -95,11 +96,13 @@ CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -151,6 +154,9 @@ CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f32], + CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> @@ -231,6 +237,8 @@ // put vector arguments in vector registers before putting them on the stack. let Entry = 1 in def CC_PPC32_SVR4 : CallingConv<[ + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, @@ -264,6 +272,10 @@ CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; +def CSR_Paired : CalleeSavedRegs<(add PSF14, PSF15, PSF16, PSF17, PSF18, PSF19, + PSF20, PSF21, PSF22, PSF23, PSF24, PSF25, + PSF26, PSF27, PSF28, PSF29, PSF30, PSF31)>; + def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; @@ -280,6 +292,8 @@ S23, S24, S25, S26, S27, S28, S29, S30, S31 )>; +def CSR_SVR432_Paired : CalleeSavedRegs<(add CSR_SVR432, CSR_Paired)>; + def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; @@ -323,6 +337,8 @@ def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, F0, (sequence "F%u", 2, 31))>; +def CSR_SVR32_ColdCC_Paired : CalleeSavedRegs<(add CSR_SVR32_ColdCC, + PSF0, (sequence "PSF%u", 2, 31))>; def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, (sequence "V%u", 0, 1), diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -2111,6 +2111,128 @@ let Inst{31} = RC; } +// Paired-Single instruction formats +class PSForm_xdab xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + bit RC = 0; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class PSForm_adabc xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + bits<5> C; + + let Pattern = pattern; + + bit RC = 0; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-25} = C; + let Inst{26-30} = xo; + let Inst{31} = RC; +} + +class PSQForm_xw op, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + bit W; + bits<3> I; + + let Pattern = pattern; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21} = W; + let Inst{22-24} = I; + let Inst{25-30} = op; + let Inst{31} = 0; // no record mode supported +} + +class PSQForm_dw op, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + :I { + bits<5> D; + bits<5> A; + bit W; + bits<3> I; + bits<12> d; + + let Pattern = pattern; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16} = W; + let Inst{17-19} = I; + let Inst{20-31} = d; +} + +class PSForm_xcrab pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<3> D; + bits<5> A; + bits<5> B; + + let Inst{6-8} = D; + let Inst{9-10} = 0; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-23} = 0; + let Inst{24} = high; + let Inst{25} = ordered; + let Inst{26-31} = 0; +} + +class PSForm_x0ab xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_xdab { + let D = 0; +} + +class PSForm_xd0b xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_xdab { + let A = 0; +} + +class PSForm_adab0 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adabc { + let C = 0; +} +class PSForm_ada0c xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adabc { + let B = 0; +} +class PSForm_ad0b0 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adab0 { + let A = 0; +} + //===----------------------------------------------------------------------===// // EmitTimePseudo won't have encoding information for the [MC]CodeEmitter // stuff diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -563,6 +563,12 @@ def f4rc : RegisterOperand { let ParserMatchClass = PPCRegF4RCAsmOperand; } +def PPCRegPSRCAsmOperand : AsmOperandClass { + let Name = "RegPSRC"; let PredicateMethod = "isRegNumber"; +} +def psrc : RegisterOperand { + let ParserMatchClass = PPCRegPSRCAsmOperand; +} def PPCRegVRRCAsmOperand : AsmOperandClass { let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; } @@ -1016,6 +1022,7 @@ def IsPPC6xx : Predicate<"Subtarget->isPPC6xx()">; def IsE500 : Predicate<"Subtarget->isE500()">; def HasSPE : Predicate<"Subtarget->hasSPE()">; +def HasPaired : Predicate<"Subtarget->hasPaired()">; def HasICBT : Predicate<"Subtarget->hasICBT()">; def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">; def NoNaNsFPMath @@ -3456,6 +3463,7 @@ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; } +include "PPCInstrPaired.td" include "PPCInstrAltivec.td" include "PPCInstrSPE.td" include "PPCInstr64Bit.td" diff --git a/llvm/lib/Target/PowerPC/PPCInstrPaired.td b/llvm/lib/Target/PowerPC/PPCInstrPaired.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrPaired.td @@ -0,0 +1,230 @@ +//===-- PPCInstrPaired.td - The PowerPC Paired Single Extension -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the Paired Single extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +multiclass PSForm_xdabr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_xdab, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_xdab, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_xd0br xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_xd0b, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_xd0b, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_adabcr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_adabc, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_adabc, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_ada0cr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_ada0c, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_ada0c, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_adab0r xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_adab0, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_adab0, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_ad0b0r xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_ad0b0, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_ad0b0, isRecordForm, RecFormRel; + } +} + +let Predicates = [HasPaired] in { + // Technically not part of the 750cl since it is a proprietary instruction + // found on the Nintendo versions of the chip. + def DCBZ_L : PSForm_x0ab<1014, (outs), (ins memrr:$dst), "dcbz_l $dst", + IIC_LdStDCBF, [(int_ppc_broadway_dcbz_l xoaddr:$dst)]>; + + def PSQ_L : PSQForm_dw<56, (outs psrc:$fD), (ins memri:$src, u1imm:$W, u3imm:$I), + "psq_l $fD, $src, $W, $I", IIC_LdStLoad, []>; + + def PSQ_LU : PSQForm_dw<57, (outs psrc:$fD, ptr_rc_nor0:$ea_result), + (ins memri:$src, u1imm:$W, u3imm:$I), + "psq_lu $fD, $src, $W, $I", IIC_LdStLoadUpd, []>, + RegConstraint<"$src.reg = $ea_result">, + NoEncode<"$ea_result">; + def PSQ_LX : PSQForm_xw<6, (outs psrc:$fD), (ins memrr:$src, u1imm:$W, u3imm:$I), + "psq_lx $fD, $src, $W, $I", IIC_LdStLoad, []>; + def PSQ_LUX : PSQForm_xw<38, (outs psrc:$fD, ptr_rc_nor0:$ea_result), + (ins memrr:$src, u1imm:$W, u3imm:$I), + "psq_lux $fD, $src, $W, $I", IIC_LdStLoad, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + def PSQ_ST : PSQForm_dw<60, (outs), (ins psrc:$fD, memri:$dst, u1imm:$W, u3imm:$I), + "psq_st $fD, $dst, $W, $I", IIC_LdStStore, []>; + + def PSQ_STU : PSQForm_dw<61, (outs ptr_rc_nor0:$ea_result), + (ins psrc:$fD, memri:$dst, u1imm:$W, u3imm:$I), + "psq_stu $fD, $dst, $W, $I", IIC_LdStSTU, []>, + RegConstraint<"$dst.reg = $ea_result">, + NoEncode<"$ea_result">; + + def PSQ_STX : PSQForm_xw<7, (outs), (ins psrc:$fD, memrr:$dst, u1imm:$W, u3imm:$I), + "psq_stx $fD, $dst, $W, $I", IIC_LdStStore, []>; + + def PSQ_STUX : PSQForm_xw<39, (outs ptr_rc_nor0:$ea_result), + (ins psrc:$fD, memrr:$dst, u1imm:$W, u3imm:$I), + "psq_stux $fD, $dst, $W, $I", IIC_LdStSTUX, []>, + RegConstraint<"$dst.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + defm PS_ABS : PSForm_xd0br<264, (outs psrc:$fD), (ins psrc:$fB), + "ps_abs", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fabs v2f32:$fB))]>; + + defm PS_ADD : PSForm_adab0r<21, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_add", "$fD, $fA, $fB", IIC_FPAddSub, + [(set v2f32:$fD, (fadd v2f32:$fA, v2f32:$fB))]>; + + def PS_CMPU0 : PSForm_xcrab<0, 0, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpu0 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPU1 : PSForm_xcrab<0, 1, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpu1 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPO0 : PSForm_xcrab<1, 0, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpo0 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPO1 : PSForm_xcrab<1, 1, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpo1 $crD, $fA, $fB", IIC_FPCompare, []>; + + defm PS_DIV : PSForm_adab0r<18, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_div", "$fD, $fA, $fB", IIC_FPDivS, + [(set v2f32:$fD, (fdiv v2f32:$fA, v2f32:$fB))]>; + + defm PS_MADD : PSForm_adabcr<29, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madd", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fadd (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB))]>; + + defm PS_MADDS0 : PSForm_adabcr<14, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madds0", "$fD, $fA, $fC, $fB", IIC_FPFused, + []>; + + defm PS_MADDS1 : PSForm_adabcr<15, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madds1", "$fD, $fA, $fC, $fB", IIC_FPFused, + []>; + defm PS_MERGE00 : PSForm_xdabr<528, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge00", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE01 : PSForm_xdabr<560, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge01", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE10 : PSForm_xdabr<592, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge10", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE11 : PSForm_xdabr<624, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge11", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MR : PSForm_xd0br<72, (outs psrc:$fD), (ins psrc:$fB), + "ps_mr", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, v2f32:$fB)]>; + + defm PS_MSUB : PSForm_adabcr<28, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_msub", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fsub (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB))]>; + + defm PS_MUL : PSForm_ada0cr<25, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_mul", "$fD, $fA, $fC", IIC_FPFused, + [(set v2f32:$fD, (fmul v2f32:$fA, v2f32:$fC))]>; + + defm PS_MULS0 : PSForm_ada0cr<12, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_muls0", "$fD, $fA, $fC", IIC_FPFused, []>; + defm PS_MULS1 : PSForm_ada0cr<13, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_muls1", "$fD, $fA, $fC", IIC_FPFused, []>; + + defm PS_NABS : PSForm_xd0br<136, (outs psrc:$fD), (ins psrc:$fB), + "ps_nabs", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fneg (fabs v2f32:$fB)))]>; + + defm PS_NEG : PSForm_xd0br<40, (outs psrc:$fD), (ins psrc:$fB), + "ps_neg", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fneg v2f32:$fB))]>; + + defm PS_NMADD : PSForm_adabcr<31, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_nmadd", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fneg (fadd (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB)))]>; + + defm PS_NMSUB : PSForm_adabcr<30, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_nmsub", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fneg (fsub (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB)))]>; + + defm PS_RES : PSForm_ad0b0r<24, (outs psrc:$fD), (ins psrc:$fB), + "ps_res", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (PPCfre v2f32:$fB))]>; + + defm PS_RSQRTE : PSForm_ad0b0r<24, (outs psrc:$fD), (ins psrc:$fB), + "ps_rsqrte", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (PPCfrsqrte v2f32:$fB))]>; + + defm PS_SEL : PSForm_adabcr<23, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sel", "$fD, $fA, $fC, $fB", IIC_FPGeneral, + []>; + + defm PS_SUB : PSForm_adab0r<20, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_sub", "$fD, $fA, $fB", IIC_FPAddSub, + [(set v2f32:$fD, (fsub v2f32:$fA, v2f32:$fB))]>; + + defm PS_SUM0 : PSForm_adabcr<10, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sum0", "$fD, $fA, $fC, $fB", IIC_FPAddSub, + []>; + defm PS_SUM1 : PSForm_adabcr<11, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sum1", "$fD, $fA, $fC, $fB", IIC_FPAddSub, + []>; +} \ No newline at end of file diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -175,6 +175,8 @@ // 32-bit targets. if (Subtarget.hasAltivec()) return CSR_SVR32_ColdCC_Altivec_SaveList; + else if (Subtarget.hasPaired()) + return CSR_SVR32_ColdCC_Paired_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR32_ColdCC_SPE_SaveList; return CSR_SVR32_ColdCC_SaveList; @@ -191,6 +193,8 @@ return CSR_AIX32_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; + if (Subtarget.hasPaired()) + return CSR_SVR432_Paired_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR432_SPE_SaveList; return CSR_SVR432_SaveList; @@ -214,20 +218,27 @@ } if (CC == CallingConv::Cold) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask - : CSR_SVR64_ColdCC_RegMask) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : (Subtarget.hasSPE() - ? CSR_SVR32_ColdCC_SPE_RegMask - : CSR_SVR32_ColdCC_RegMask)); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask + : CSR_SVR64_ColdCC_RegMask) + : (Subtarget.hasAltivec() + ? CSR_SVR32_ColdCC_Altivec_RegMask + : (Subtarget.hasPaired() + ? CSR_SVR32_ColdCC_Paired_RegMask + : (Subtarget.hasSPE() + ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask))); } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask - : CSR_PPC64_RegMask) - : (Subtarget.hasAltivec() - ? CSR_SVR432_Altivec_RegMask - : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask - : CSR_SVR432_RegMask)); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask) + : (Subtarget.hasAltivec() + ? CSR_SVR432_Altivec_RegMask + : (Subtarget.hasPaired() + ? CSR_SVR432_Paired_RegMask + : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask))); } const uint32_t* @@ -311,6 +322,13 @@ IE = PPC::VRRCRegClass.end(); I != IE; ++I) markSuperRegs(Reserved, *I); + // Reserve Paired Single registers when PS is unavailable. + if (!Subtarget.hasPaired()) + for (TargetRegisterClass::iterator I = PPC::PSRCRegClass.begin(), + IE = PPC::PSRCRegClass.end(); + I != IE; ++I) + markSuperRegs(Reserved, *I); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -54,6 +54,10 @@ let HWEncoding{4-0} = num; } +// PSF - One of the 32 2x32-bit paired single floating-point registers +class PSF num, string n> : PPCReg { + let HWEncoding{4-0} = num; +} // VF - One of the 32 64-bit floating-point subregisters of the vector // registers (used by VSX). class VF num, string n> : PPCReg { @@ -117,6 +121,11 @@ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// Paired Single floating-point registers +foreach Index = 0-31 in { + def PSF#Index : PSF; +} + // 64-bit Floating-point subregisters of Altivec registers // Note: the register names are v0-v31 or vs32-vs63 depending on the use. // Custom C++ code is used to produce the correct name and encoding. @@ -298,6 +307,9 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; +def PSRC : RegisterClass<"PPC", [v2f32], 64, (add (sequence "PSF%u", 0, 13), + (sequence "PSF%u", 31, 14))>; + def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128], 128, diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -41,10 +41,10 @@ let CompleteModel = 1; // Do not support SPE (Signal Processing Engine), prefixed instructions on - // Power 9, paired vector mem ops, PC relative mem ops, or instructions - // introduced in ISA 3.1. + // Power 9, paired vector mem ops, PC relative mem ops, instructions + // introduced in ISA 3.1, or the Paired Single extension. let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, - PCRelativeMemops, IsISA3_1]; + PCRelativeMemops, IsISA3_1, HasPaired]; } let SchedModel = P9Model in { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -94,6 +94,7 @@ bool UseCRBits; bool HasHardFloat; bool IsPPC64; + bool HasPaired; bool HasAltivec; bool HasFPU; bool HasSPE; @@ -247,6 +248,7 @@ bool hasLFIWAX() const { return HasLFIWAX; } bool hasFPRND() const { return HasFPRND; } bool hasFPCVT() const { return HasFPCVT; } + bool hasPaired() const { return HasPaired; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } bool hasFPU() const { return HasFPU; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -63,6 +63,7 @@ Use64BitRegs = false; UseCRBits = false; HasHardFloat = false; + HasPaired = false; HasAltivec = false; HasSPE = false; HasFPU = false; @@ -157,6 +158,11 @@ report_fatal_error( "SPE and traditional floating point cannot both be enabled.\n", false); + if (HasPaired && IsPPC64) + report_fatal_error("Paired Single is only supported for 32-bit targets.\n", false); + if (HasPaired && HasAltivec) + report_fatal_error("Paired Single and Altivec cannot both be enabled.\n", false); + // If not SPE, set standard FPU if (!HasSPE) HasFPU = true; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -749,6 +749,8 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasAltivec()) return 128; + if (ST->hasPaired()) + return 64; return 0; }