diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1227,3 +1227,21 @@ Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; } + +// Paired Single Intrinsics +let TargetPrefix = "ppc" in { +def int_ppc_paired_l: Intrinsic<[llvm_v2f32_ty], [llvm_ptr_ty, llvm_i1_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_ppc_paired_st: Intrinsic<[], [llvm_v2f32_ty, llvm_ptr_ty, llvm_i1_ty, llvm_i8_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_ppc_paired_madds0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_madds1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_merge00: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge01: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge10: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_merge11: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty]>; +def int_ppc_paired_muls0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_muls1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sel: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sum0: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_paired_sum1: Intrinsic<[llvm_v2f32_ty], [llvm_v2f32_ty, llvm_v2f32_ty, llvm_v2f32_ty], []>; +def int_ppc_broadway_dcbz_l : Intrinsic<[], [llvm_ptr_ty], []>; +} diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -330,6 +330,9 @@ bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } + bool isS12Imm() const { + return Kind == Expression || (Kind == Immediate && isInt<12>(getImm())); + } bool isU16Imm() const { switch (Kind) { case Expression: @@ -467,6 +470,11 @@ Inst.addOperand(MCOperand::createReg(FRegs[getReg()])); } + void addRegPSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(PSRegs[getReg()])); + } + void addRegVFRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VFRegs[getReg()])); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -110,6 +110,12 @@ return decodeRegisterClass(Inst, RegNo, FRegs); } +static DecodeStatus DecodePSRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, PSRegs); +} + static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { @@ -402,5 +408,12 @@ return result; } + if (STI.getFeatureBits()[PPC::FeaturePaired]) { + DecodeStatus result = decodeInstruction(DecoderTablePairedSingle32, MI, + Inst, Address, this, STI); + if (result != MCDisassembler::Fail) + return result; + } + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -152,34 +152,27 @@ using llvm::MCPhysReg; -#define DEFINE_PPC_REGCLASSES \ - static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ - static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ - static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ - static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ - static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ - static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ - static const MCPhysReg RRegsNoR0[32] = \ - PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \ - static const MCPhysReg XRegsNoX0[32] = \ - PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \ - static const MCPhysReg VSRegs[64] = \ - PPC_REGS_LO_HI(PPC::VSL, PPC::V); \ - static const MCPhysReg VSFRegs[64] = \ - PPC_REGS_LO_HI(PPC::F, PPC::VF); \ - static const MCPhysReg VSSRegs[64] = \ - PPC_REGS_LO_HI(PPC::F, PPC::VF); \ - static const MCPhysReg CRBITRegs[32] = { \ - PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \ - PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \ - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \ - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \ - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \ - PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ - PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ - PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ - static const MCPhysReg CRRegs[8] = { \ - PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \ - PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7} +#define DEFINE_PPC_REGCLASSES \ + static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ + static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ + static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ + static const MCPhysReg PSRegs[32] = PPC_REGS0_31(PPC::PSF); \ + static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ + static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ + static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ + static const MCPhysReg RRegsNoR0[32] = PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \ + static const MCPhysReg XRegsNoX0[32] = PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \ + static const MCPhysReg VSRegs[64] = PPC_REGS_LO_HI(PPC::VSL, PPC::V); \ + static const MCPhysReg VSFRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg VSSRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg CRBITRegs[32] = { \ + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, \ + PPC::CR1EQ, PPC::CR1UN, PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \ + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, PPC::CR4LT, PPC::CR4GT, \ + PPC::CR4EQ, PPC::CR4UN, PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, PPC::CR7LT, PPC::CR7GT, \ + PPC::CR7EQ, PPC::CR7UN}; \ + static const MCPhysReg CRRegs[8] = {PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \ + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7} #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -66,6 +66,9 @@ def FeatureFPU : SubtargetFeature<"fpu","HasFPU","true", "Enable classic FPU instructions", [FeatureHardFloat]>; +def FeaturePaired : SubtargetFeature<"paired","HasPaired","true", + "Enable PPC750CL Paired-Single instructions", + [FeatureFPU]>; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions", [FeatureFPU]>; @@ -472,6 +475,9 @@ def : Processor<"750", G4Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; +def : Processor<"750cl", G3Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB, FeaturePaired]>; def : Processor<"g3", G3Itineraries, [Directive750, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -61,6 +61,7 @@ CCIfType<[f64], CCAssignToReg<[F1]>>, CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>, + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", CCAssignToReg<[PSF1]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>> @@ -95,11 +96,13 @@ CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -151,6 +154,9 @@ CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f32], + CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> @@ -231,6 +237,8 @@ // put vector arguments in vector registers before putting them on the stack. let Entry = 1 in def CC_PPC32_SVR4 : CallingConv<[ + CCIfType<[v2f32], CCIfSubtarget<"hasPaired()", + CCAssignToReg<[PSF1, PSF2, PSF3, PSF4, PSF5, PSF6, PSF7, PSF8]>>>, // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, @@ -264,6 +272,10 @@ CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; +def CSR_Paired : CalleeSavedRegs<(add PSF14, PSF15, PSF16, PSF17, PSF18, PSF19, + PSF20, PSF21, PSF22, PSF23, PSF24, PSF25, + PSF26, PSF27, PSF28, PSF29, PSF30, PSF31)>; + def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; @@ -280,6 +292,8 @@ S23, S24, S25, S26, S27, S28, S29, S30, S31 )>; +def CSR_SVR432_Paired : CalleeSavedRegs<(add CSR_SVR432, CSR_Paired)>; + def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; @@ -323,6 +337,8 @@ def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, F0, (sequence "F%u", 2, 31))>; +def CSR_SVR32_ColdCC_Paired : CalleeSavedRegs<(add CSR_SVR32_ColdCC, + PSF0, (sequence "PSF%u", 2, 31))>; def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, (sequence "V%u", 0, 1), diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -2111,6 +2111,128 @@ let Inst{31} = RC; } +// Paired-Single instruction formats +class PSForm_xdab xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + bit RC = 0; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class PSForm_adabc xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + bits<5> C; + + let Pattern = pattern; + + bit RC = 0; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-25} = C; + let Inst{26-30} = xo; + let Inst{31} = RC; +} + +class PSQForm_xw op, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> D; + bits<5> A; + bits<5> B; + bit W; + bits<3> I; + + let Pattern = pattern; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21} = W; + let Inst{22-24} = I; + let Inst{25-30} = op; + let Inst{31} = 0; // no record mode supported +} + +class PSQForm_dw op, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + :I { + bits<5> D; + bit W; + bits<3> I; + bits<12> d; + bits<5> A; + + let Pattern = pattern; + + let Inst{6-10} = D; + let Inst{11-15} = A; + let Inst{16} = W; + let Inst{17-19} = I; + let Inst{20-31} = d; +} + +class PSForm_xcrab pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<3> D; + bits<5> A; + bits<5> B; + + let Inst{6-8} = D; + let Inst{9-10} = 0; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-23} = 0; + let Inst{24} = high; + let Inst{25} = ordered; + let Inst{26-31} = 0; +} + +class PSForm_x0ab xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_xdab { + let D = 0; +} + +class PSForm_xd0b xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_xdab { + let A = 0; +} + +class PSForm_adab0 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adabc { + let C = 0; +} +class PSForm_ada0c xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adabc { + let B = 0; +} +class PSForm_ad0b0 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PSForm_adab0 { + let A = 0; +} + //===----------------------------------------------------------------------===// // EmitTimePseudo won't have encoding information for the [MC]CodeEmitter // stuff diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -609,6 +609,12 @@ def f4rc : RegisterOperand { let ParserMatchClass = PPCRegF4RCAsmOperand; } +def PPCRegPSRCAsmOperand : AsmOperandClass { + let Name = "RegPSRC"; let PredicateMethod = "isRegNumber"; +} +def psrc : RegisterOperand { + let ParserMatchClass = PPCRegPSRCAsmOperand; +} def PPCRegVRRCAsmOperand : AsmOperandClass { let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; } @@ -951,6 +957,13 @@ def dispSPE2 : Operand { let ParserMatchClass = PPCDispSPE2Operand; } +def PPCDispRID12Operand : AsmOperandClass { + let Name = "DispRID12"; let PredicateMethod = "isS12Imm"; + let RenderMethod = "addImmOperands"; +} +def dispRID12 : Operand { + let ParserMatchClass = PPCDispRID12Operand; +} def memri : Operand { let PrintMethod = "printMemRegImm"; @@ -992,6 +1005,10 @@ let EncoderMethod = "getSPE2DisEncoding"; let DecoderMethod = "decodeSPE2Operands"; } +def memrid12 : Operand { // Paired Single displacement where imm is 12 bits wide. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRID12:$imm, ptr_rc_nor0:$reg); +} // A single-register address. This is used with the SjLj // pseudo-instructions which translates to LD/LWZ. These instructions requires @@ -1062,6 +1079,7 @@ def IsPPC6xx : Predicate<"Subtarget->isPPC6xx()">; def IsE500 : Predicate<"Subtarget->isE500()">; def HasSPE : Predicate<"Subtarget->hasSPE()">; +def HasPaired : Predicate<"Subtarget->hasPaired()">; def HasICBT : Predicate<"Subtarget->hasICBT()">; def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">; def NoNaNsFPMath @@ -3508,6 +3526,7 @@ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; } +include "PPCInstrPaired.td" include "PPCInstrAltivec.td" include "PPCInstrSPE.td" include "PPCInstr64Bit.td" diff --git a/llvm/lib/Target/PowerPC/PPCInstrPaired.td b/llvm/lib/Target/PowerPC/PPCInstrPaired.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrPaired.td @@ -0,0 +1,230 @@ +//===-- PPCInstrPaired.td - The PowerPC Paired Single Extension -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the Paired Single extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +multiclass PSForm_xdabr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_xdab, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_xdab, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_xd0br xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_xd0b, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_xd0b, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_adabcr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_adabc, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_adabc, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_ada0cr xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_ada0c, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_ada0c, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_adab0r xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_adab0, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_adab0, isRecordForm, RecFormRel; + } +} + +multiclass PSForm_ad0b0r xo, dag OOL, dag IOL, string asmbase, + string asmstr, InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : PSForm_ad0b0, RecFormRel; + let Defs = [CR1] in + def _rec : PSForm_ad0b0, isRecordForm, RecFormRel; + } +} + +let DecoderNamespace = "PairedSingle" in { + let Predicates = [HasPaired] in { + def DCBZ_L : PSForm_x0ab<1014, (outs), (ins memrr:$dst), "dcbz_l $dst", + IIC_LdStDCBF, [(int_ppc_broadway_dcbz_l xoaddr:$dst)]>; + + def PSQ_L : PSQForm_dw<56, (outs psrc:$fD), (ins memrid12:$src, u1imm:$W, u3imm:$I), + "psq_l $fD, $src, $W, $I", IIC_LdStLoad, []>; + + def PSQ_LU : PSQForm_dw<57, (outs psrc:$fD, ptr_rc_nor0:$ea_result), + (ins memrid12:$src, u1imm:$W, u3imm:$I), + "psq_lu $fD, $src, $W, $I", IIC_LdStLoadUpd, []>, + RegConstraint<"$src.reg = $ea_result">, + NoEncode<"$ea_result">; + def PSQ_LX : PSQForm_xw<6, (outs psrc:$fD), (ins memrr:$src, u1imm:$W, u3imm:$I), + "psq_lx $fD, $src, $W, $I", IIC_LdStLoad, []>; + def PSQ_LUX : PSQForm_xw<38, (outs psrc:$fD, ptr_rc_nor0:$ea_result), + (ins memrr:$src, u1imm:$W, u3imm:$I), + "psq_lux $fD, $src, $W, $I", IIC_LdStLoad, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + def PSQ_ST : PSQForm_dw<60, (outs), (ins psrc:$fD, memrid12:$dst, u1imm:$W, u3imm:$I), + "psq_st $fD, $dst, $W, $I", IIC_LdStStore, []>; + + def PSQ_STU : PSQForm_dw<61, (outs ptr_rc_nor0:$ea_result), + (ins psrc:$fD, memrid12:$dst, u1imm:$W, u3imm:$I), + "psq_stu $fD, $dst, $W, $I", IIC_LdStSTU, []>, + RegConstraint<"$dst.reg = $ea_result">, + NoEncode<"$ea_result">; + + def PSQ_STX : PSQForm_xw<7, (outs), (ins psrc:$fD, memrr:$dst, u1imm:$W, u3imm:$I), + "psq_stx $fD, $dst, $W, $I", IIC_LdStStore, []>; + + def PSQ_STUX : PSQForm_xw<39, (outs ptr_rc_nor0:$ea_result), + (ins psrc:$fD, memrr:$dst, u1imm:$W, u3imm:$I), + "psq_stux $fD, $dst, $W, $I", IIC_LdStSTUX, []>, + RegConstraint<"$dst.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + defm PS_ABS : PSForm_xd0br<264, (outs psrc:$fD), (ins psrc:$fB), + "ps_abs", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fabs v2f32:$fB))]>; + + defm PS_ADD : PSForm_adab0r<21, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_add", "$fD, $fA, $fB", IIC_FPAddSub, + [(set v2f32:$fD, (fadd v2f32:$fA, v2f32:$fB))]>; + + def PS_CMPU0 : PSForm_xcrab<0, 0, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpu0 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPU1 : PSForm_xcrab<0, 1, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpu1 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPO0 : PSForm_xcrab<1, 0, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpo0 $crD, $fA, $fB", IIC_FPCompare, []>; + def PS_CMPO1 : PSForm_xcrab<1, 1, (outs crrc:$crD), (ins psrc:$fA, psrc:$fB), + "ps_cmpo1 $crD, $fA, $fB", IIC_FPCompare, []>; + + defm PS_DIV : PSForm_adab0r<18, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_div", "$fD, $fA, $fB", IIC_FPDivS, + [(set v2f32:$fD, (fdiv v2f32:$fA, v2f32:$fB))]>; + + defm PS_MADD : PSForm_adabcr<29, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madd", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fadd (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB))]>; + + defm PS_MADDS0 : PSForm_adabcr<14, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madds0", "$fD, $fA, $fC, $fB", IIC_FPFused, + []>; + + defm PS_MADDS1 : PSForm_adabcr<15, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_madds1", "$fD, $fA, $fC, $fB", IIC_FPFused, + []>; + defm PS_MERGE00 : PSForm_xdabr<528, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge00", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE01 : PSForm_xdabr<560, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge01", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE10 : PSForm_xdabr<592, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge10", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MERGE11 : PSForm_xdabr<624, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_merge11", "$fD, $fA, $fB", IIC_FPGeneral, + []>; + defm PS_MR : PSForm_xd0br<72, (outs psrc:$fD), (ins psrc:$fB), + "ps_mr", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, v2f32:$fB)]>; + + defm PS_MSUB : PSForm_adabcr<28, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_msub", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fsub (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB))]>; + + defm PS_MUL : PSForm_ada0cr<25, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_mul", "$fD, $fA, $fC", IIC_FPFused, + [(set v2f32:$fD, (fmul v2f32:$fA, v2f32:$fC))]>; + + defm PS_MULS0 : PSForm_ada0cr<12, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_muls0", "$fD, $fA, $fC", IIC_FPFused, []>; + defm PS_MULS1 : PSForm_ada0cr<13, (outs psrc:$fD), (ins psrc:$fA, psrc:$fC), + "ps_muls1", "$fD, $fA, $fC", IIC_FPFused, []>; + + defm PS_NABS : PSForm_xd0br<136, (outs psrc:$fD), (ins psrc:$fB), + "ps_nabs", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fneg (fabs v2f32:$fB)))]>; + + defm PS_NEG : PSForm_xd0br<40, (outs psrc:$fD), (ins psrc:$fB), + "ps_neg", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (fneg v2f32:$fB))]>; + + defm PS_NMADD : PSForm_adabcr<31, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_nmadd", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fneg (fadd (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB)))]>; + + defm PS_NMSUB : PSForm_adabcr<30, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_nmsub", "$fD, $fA, $fC, $fB", IIC_FPFused, + [(set v2f32:$fD, (fneg (fsub (fmul v2f32:$fA, v2f32:$fC), v2f32:$fB)))]>; + + defm PS_RES : PSForm_ad0b0r<24, (outs psrc:$fD), (ins psrc:$fB), + "ps_res", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (PPCfre v2f32:$fB))]>; + + defm PS_RSQRTE : PSForm_ad0b0r<26, (outs psrc:$fD), (ins psrc:$fB), + "ps_rsqrte", "$fD, $fB", IIC_FPGeneral, + [(set v2f32:$fD, (PPCfrsqrte v2f32:$fB))]>; + + defm PS_SEL : PSForm_adabcr<23, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sel", "$fD, $fA, $fC, $fB", IIC_FPGeneral, + []>; + + defm PS_SUB : PSForm_adab0r<20, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB), + "ps_sub", "$fD, $fA, $fB", IIC_FPAddSub, + [(set v2f32:$fD, (fsub v2f32:$fA, v2f32:$fB))]>; + + defm PS_SUM0 : PSForm_adabcr<10, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sum0", "$fD, $fA, $fC, $fB", IIC_FPAddSub, + []>; + defm PS_SUM1 : PSForm_adabcr<11, (outs psrc:$fD), (ins psrc:$fA, psrc:$fB, psrc:$fC), + "ps_sum1", "$fD, $fA, $fC, $fB", IIC_FPAddSub, + []>; + } +} \ No newline at end of file diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -175,6 +175,8 @@ // 32-bit targets. if (Subtarget.hasAltivec()) return CSR_SVR32_ColdCC_Altivec_SaveList; + else if (Subtarget.hasPaired()) + return CSR_SVR32_ColdCC_Paired_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR32_ColdCC_SPE_SaveList; return CSR_SVR32_ColdCC_SaveList; @@ -191,6 +193,8 @@ return CSR_AIX32_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; + if (Subtarget.hasPaired()) + return CSR_SVR432_Paired_SaveList; else if (Subtarget.hasSPE()) return CSR_SVR432_SPE_SaveList; return CSR_SVR432_SaveList; @@ -214,20 +218,27 @@ } if (CC == CallingConv::Cold) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask - : CSR_SVR64_ColdCC_RegMask) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : (Subtarget.hasSPE() - ? CSR_SVR32_ColdCC_SPE_RegMask - : CSR_SVR32_ColdCC_RegMask)); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask + : CSR_SVR64_ColdCC_RegMask) + : (Subtarget.hasAltivec() + ? CSR_SVR32_ColdCC_Altivec_RegMask + : (Subtarget.hasPaired() + ? CSR_SVR32_ColdCC_Paired_RegMask + : (Subtarget.hasSPE() + ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask))); } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask - : CSR_PPC64_RegMask) - : (Subtarget.hasAltivec() - ? CSR_SVR432_Altivec_RegMask - : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask - : CSR_SVR432_RegMask)); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask) + : (Subtarget.hasAltivec() + ? CSR_SVR432_Altivec_RegMask + : (Subtarget.hasPaired() + ? CSR_SVR432_Paired_RegMask + : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask))); } const uint32_t* @@ -311,6 +322,13 @@ IE = PPC::VRRCRegClass.end(); I != IE; ++I) markSuperRegs(Reserved, *I); + // Reserve Paired Single registers when PS is unavailable. + if (!Subtarget.hasPaired()) + for (TargetRegisterClass::iterator I = PPC::PSRCRegClass.begin(), + IE = PPC::PSRCRegClass.end(); + I != IE; ++I) + markSuperRegs(Reserved, *I); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -54,6 +54,10 @@ let HWEncoding{4-0} = num; } +// PSF - One of the 32 2x32-bit paired single floating-point registers +class PSF num, string n> : PPCReg { + let HWEncoding{4-0} = num; +} // VF - One of the 32 64-bit floating-point subregisters of the vector // registers (used by VSX). class VF num, string n> : PPCReg { @@ -117,6 +121,11 @@ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// Paired Single floating-point registers +foreach Index = 0-31 in { + def PSF#Index : PSF; +} + // 64-bit Floating-point subregisters of Altivec registers // Note: the register names are v0-v31 or vs32-vs63 depending on the use. // Custom C++ code is used to produce the correct name and encoding. @@ -298,6 +307,9 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; +def PSRC : RegisterClass<"PPC", [v2f32], 64, (add (sequence "PSF%u", 0, 13), + (sequence "PSF%u", 31, 14))>; + def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128], 128, diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,9 +42,9 @@ // Do not support SPE (Signal Processing Engine), prefixed instructions on // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions - // introduced in ISA 3.1. + // introduced in ISA 3.1, or the Paired Single extension. let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, - PCRelativeMemops, IsISA3_1]; + PCRelativeMemops, IsISA3_1, HasPaired]; } let SchedModel = P9Model in { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -94,6 +94,7 @@ bool UseCRBits; bool HasHardFloat; bool IsPPC64; + bool HasPaired; bool HasAltivec; bool HasFPU; bool HasSPE; @@ -248,6 +249,7 @@ bool hasLFIWAX() const { return HasLFIWAX; } bool hasFPRND() const { return HasFPRND; } bool hasFPCVT() const { return HasFPCVT; } + bool hasPaired() const { return HasPaired; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } bool hasFPU() const { return HasFPU; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -63,6 +63,7 @@ Use64BitRegs = false; UseCRBits = false; HasHardFloat = false; + HasPaired = false; HasAltivec = false; HasSPE = false; HasFPU = false; @@ -158,6 +159,11 @@ report_fatal_error( "SPE and traditional floating point cannot both be enabled.\n", false); + if (HasPaired && IsPPC64) + report_fatal_error("Paired Single is only supported for 32-bit targets.\n", false); + if (HasPaired && HasAltivec) + report_fatal_error("Paired Single and Altivec cannot both be enabled.\n", false); + // If not SPE, set standard FPU if (!HasSPE) HasFPU = true; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -830,6 +830,8 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasAltivec()) return 128; + if (ST->hasPaired()) + return 64; return 0; } diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc32-paired.txt b/llvm/test/MC/Disassembler/PowerPC/ppc32-paired.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/ppc32-paired.txt @@ -0,0 +1,127 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc-unknown-unknown -mcpu=750cl | FileCheck %s +# CHECK: dcbz_l 0, 3 +0x10 0x00 0x1f 0xec +# CHECK: psq_l 3, 4(13), 1, 0 +0xe0 0x6d 0x80 0x04 +# CHECK: psq_lu 3, 4(13), 1, 0 +0xe4 0x6d 0x80 0x04 +# CHECK: psq_lux 3, 0, 3, 1, 0 +0x10 0x60 0x1c 0x4c +# CHECK: psq_lx 3, 0, 3, 1, 0 +0x10 0x60 0x1c 0x0c +# CHECK: psq_st 3, 4(13), 1, 0 +0xf0 0x6d 0x80 0x04 +# CHECK: psq_stu 3, 4(13), 1, 0 +0xf4 0x6d 0x80 0x04 +# CHECK: psq_stux 3, 0, 3, 1, 0 +0x10 0x60 0x1c 0x4e +# CHECK: psq_stx 3, 0, 3, 1, 0 +0x10 0x60 0x1c 0x0e +# CHECK: ps_abs 3, 3 +0x10 0x60 0x1a 0x10 +# CHECK: ps_abs. 3, 3 +0x10 0x60 0x1a 0x11 +# CHECK: ps_add 3, 3, 4 +0x10 0x63 0x20 0x2a +# CHECK: ps_add. 3, 3, 4 +0x10 0x63 0x20 0x2b +# CHECK: ps_cmpo0 1, 3, 4 +0x10 0x83 0x20 0x40 +# CHECK: ps_cmpo1 1, 3, 4 +0x10 0x83 0x20 0xC0 +# CHECK: ps_cmpu0 1, 3, 4 +0x10 0x83 0x20 0x00 +# CHECK: ps_cmpu1 1, 3, 4 +0x10 0x83 0x20 0x80 +# CHECK: ps_div 3, 3, 4 +0x10 0x63 0x20 0x24 +# CHECK: ps_div. 3, 3, 4 +0x10 0x63 0x20 0x25 +# CHECK: ps_madd 3, 3, 4, 5 +0x10 0x63 0x29 0x3a +# CHECK: ps_madd. 3, 3, 4, 5 +0x10 0x63 0x29 0x3b +# CHECK: ps_madds0 3, 3, 4, 5 +0x10 0x63 0x29 0x1c +# CHECK: ps_madds0. 3, 3, 4, 5 +0x10 0x63 0x29 0x1d +# CHECK: ps_madds1 3, 3, 4, 5 +0x10 0x63 0x29 0x1e +# CHECK: ps_madds1. 3, 3, 4, 5 +0x10 0x63 0x29 0x1f +# CHECK: ps_merge00 3, 3, 4 +0x10 0x63 0x24 0x20 +# CHECK: ps_merge00. 3, 3, 4 +0x10 0x63 0x24 0x21 +# CHECK: ps_merge01 3, 3, 4 +0x10 0x63 0x24 0x60 +# CHECK: ps_merge01. 3, 3, 4 +0x10 0x63 0x24 0x61 +# CHECK: ps_merge10 3, 3, 4 +0x10 0x63 0x24 0xA0 +# CHECK: ps_merge10. 3, 3, 4 +0x10 0x63 0x24 0xA1 +# CHECK: ps_merge11 3, 3, 4 +0x10 0x63 0x24 0xE0 +# CHECK: ps_merge11. 3, 3, 4 +0x10 0x63 0x24 0xE1 +# CHECK: ps_mr 3, 4 +0x10 0x60 0x20 0x90 +# CHECK: ps_mr. 3, 4 +0x10 0x60 0x20 0x91 +# CHECK: ps_msub 3, 3, 4, 5 +0x10 0x63 0x29 0x38 +# CHECK: ps_msub. 3, 3, 4, 5 +0x10 0x63 0x29 0x39 +# CHECK: ps_mul 3, 3, 4 +0x10 0x63 0x01 0x32 +# CHECK: ps_mul. 3, 3, 4 +0x10 0x63 0x01 0x33 +# CHECK: ps_muls0 3, 3, 4 +0x10 0x63 0x01 0x18 +# CHECK: ps_muls0. 3, 3, 4 +0x10 0x63 0x01 0x19 +# CHECK: ps_muls1 3, 3, 4 +0x10 0x63 0x01 0x1A +# CHECK: ps_muls1. 3, 3, 4 +0x10 0x63 0x01 0x1B +# CHECK: ps_nabs 3, 3 +0x10 0x60 0x19 0x10 +# CHECK: ps_nabs. 3, 3 +0x10 0x60 0x19 0x11 +# CHECK: ps_neg 3, 3 +0x10 0x60 0x18 0x50 +# CHECK: ps_neg. 3, 3 +0x10 0x60 0x18 0x51 +# CHECK: ps_nmadd 3, 3, 4, 5 +0x10 0x63 0x29 0x3e +# CHECK: ps_nmadd. 3, 3, 4, 5 +0x10 0x63 0x29 0x3f +# CHECK: ps_nmsub 3, 3, 4, 5 +0x10 0x63 0x29 0x3c +# CHECK: ps_nmsub. 3, 3, 4, 5 +0x10 0x63 0x29 0x3d +# CHECK: ps_res 3, 3 +0x10 0x60 0x18 0x30 +# CHECK: ps_res. 3, 3 +0x10 0x60 0x18 0x31 +# CHECK: ps_rsqrte 3, 3 +0x10 0x60 0x18 0x34 +# CHECK: ps_rsqrte. 3, 3 +0x10 0x60 0x18 0x35 +# CHECK: ps_sel 3, 3, 4, 5 +0x10 0x63 0x29 0x2e +# CHECK: ps_sel. 3, 3, 4, 5 +0x10 0x63 0x29 0x2f +# CHECK: ps_sub 3, 3, 4 +0x10 0x63 0x20 0x28 +# CHECK: ps_sub. 3, 3, 4 +0x10 0x63 0x20 0x29 +# CHECK: ps_sum0 3, 3, 4, 5 +0x10 0x63 0x29 0x14 +# CHECK: ps_sum0. 3, 3, 4, 5 +0x10 0x63 0x29 0x15 +# CHECK: ps_sum1 3, 3, 4, 5 +0x10 0x63 0x29 0x16 +# CHECK: ps_sum1. 3, 3, 4, 5 +0x10 0x63 0x29 0x17 \ No newline at end of file