Index: include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- include/llvm/IR/IntrinsicsPowerPC.td +++ include/llvm/IR/IntrinsicsPowerPC.td @@ -1135,3 +1135,130 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; } + +//===----------------------------------------------------------------------===// +// PowerPC SPE Intrinsic Definitions. + +let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". + +/// PowerPC_SPE_Intrinsic - Base class for all SPE intrinsics. +class PowerPC_SPE_Intrinsic ret_types, + list param_types, + list properties> + : GCCBuiltin, + Intrinsic; + +/// Single argument +class PowerPC_SPE_Vec_Intrinsic21 + : PowerPC_SPE_Intrinsic; + +class PowerPC_SPE_Vec_Intrinsic1 + : PowerPC_SPE_Intrinsic; + +/// Two argument (vectors) +class PowerPC_SPE_Vec_Intrinsic2 + : PowerPC_SPE_Intrinsic; + +/// Single plus immediate +class PowerPC_SPE_Vec_IntrinsicI + : PowerPC_SPE_Intrinsic; + +class PowerPC_SPE_Vec_I1 + : PowerPC_SPE_Vec_Intrinsic1; + +class PowerPC_SPE_Vec_I2 + : PowerPC_SPE_Vec_Intrinsic1; + +class PowerPC_SPE_Vec_II + : PowerPC_SPE_Vec_IntrinsicI; + +class PowerPC_SPE_Vec_F1 + : PowerPC_SPE_Vec_Intrinsic1; + +class PowerPC_SPE_Vec_F2 + : PowerPC_SPE_Vec_Intrinsic1; + + +def int_ppc_spe_brinc : + PowerPC_SPE_Intrinsic<"brinc", [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_ppc_spe_evabs : PowerPC_SPE_Vec_I1<"evabs">; +def int_ppc_spe_evneg : PowerPC_SPE_Vec_I1<"evneg">; +def int_ppc_spe_evaddiw : PowerPC_SPE_Vec_II<"evaddiw">; +def int_ppc_spe_evaddw : PowerPC_SPE_Vec_I2<"evaddw">; +def int_ppc_spe_evsubfiw : PowerPC_SPE_Vec_II<"evsubfiw">; +def int_ppc_spe_evsubfw: PowerPC_SPE_Vec_I2<"evsubfw">; +def int_ppc_spe_evand: PowerPC_SPE_Vec_I2<"evand">; +def int_ppc_spe_evandc: PowerPC_SPE_Vec_I2<"evandc">; +def int_ppc_spe_evnand: PowerPC_SPE_Vec_I2<"evnand">; +def int_ppc_spe_evor: PowerPC_SPE_Vec_I2<"evor">; +def int_ppc_spe_evorc: PowerPC_SPE_Vec_I2<"evorc">; +def int_ppc_spe_evnor: PowerPC_SPE_Vec_I2<"evnor">; +def int_ppc_spe_evextsb : PowerPC_SPE_Vec_I1<"evextsb">; +def int_ppc_spe_evextsh : PowerPC_SPE_Vec_I1<"evextsh">; +def int_ppc_spe_evrlw : PowerPC_SPE_Vec_I2<"evrlw">; +def int_ppc_spe_evrlwi : PowerPC_SPE_Vec_II<"evrlwi">; +def int_ppc_spe_evslw : PowerPC_SPE_Vec_I2<"evslw">; +def int_ppc_spe_evslwi : PowerPC_SPE_Vec_II<"evslwi">; +def int_ppc_spe_evsrws : PowerPC_SPE_Vec_I2<"evsrws">; +def int_ppc_spe_evsrwis : PowerPC_SPE_Vec_I2<"evsrwis">; +def int_ppc_spe_evsrwu : PowerPC_SPE_Vec_I2<"evsrwu">; +def int_ppc_spe_evsrwiu : PowerPC_SPE_Vec_I2<"evsrwiu">; +def int_ppc_spe_evcntlsw : PowerPC_SPE_Vec_I1<"evcntlsw">; +def int_ppc_spe_evcntlzw : PowerPC_SPE_Vec_I1<"evcntlzw">; +def int_ppc_spe_evrndw : PowerPC_SPE_Vec_I1<"evrndw">; +def int_ppc_spe_evmergehi : PowerPC_SPE_Vec_I2<"evmergehi">; +def int_ppc_spe_evmergelo : PowerPC_SPE_Vec_I2<"evmergelo">; +def int_ppc_spe_evmergehilo : PowerPC_SPE_Vec_I2<"evmergehilo">; +def int_ppc_spe_evmergelohi : PowerPC_SPE_Vec_I2<"evmergelohi">; +def int_ppc_spe_evsplati : PowerPC_SPE_Intrinsic<"evsplati", + [llvm_v2i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_ppc_spe_evsplatfi : PowerPC_SPE_Intrinsic<"evsplatfi", + [llvm_v2i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_ppc_spe_divws : PowerPC_SPE_Vec_I2<"evdivws">; +def int_ppc_spe_divwu : PowerPC_SPE_Vec_I2<"evdivwu">; +def int_ppc_spe_mra : PowerPC_SPE_Vec_I1<"evmra">; + +def int_ppc_spe_evfsabs : PowerPC_SPE_Vec_F1<"evfsabs">; +def int_ppc_spe_evfsnabs : PowerPC_SPE_Vec_F1<"evfsnabs">; +def int_ppc_spe_evfsneg : PowerPC_SPE_Vec_F1<"evfsneg">; +def int_ppc_spe_evfsadd : PowerPC_SPE_Vec_F2<"evfsadd">; +def int_ppc_spe_evfssub : PowerPC_SPE_Vec_F2<"evfssub">; +def int_ppc_spe_evfsmul : PowerPC_SPE_Vec_F2<"evfsmul">; +def int_ppc_spe_evfsdiv : PowerPC_SPE_Vec_F2<"evfsdiv">; +def int_ppc_spe_evfcfui : PowerPC_SPE_Vec_Intrinsic21<"evfscfui", + llvm_v2f32_ty, + llvm_v2i32_ty>; +def int_ppc_spe_evfcfsi : PowerPC_SPE_Vec_Intrinsic21<"evfscfsi", + llvm_v2f32_ty, + llvm_v2i32_ty>; +def int_ppc_spe_evfcfuf : PowerPC_SPE_Vec_Intrinsic21<"evfscfuf", + llvm_v2f32_ty, + llvm_v2i32_ty>; +def int_ppc_spe_evfcfsf : PowerPC_SPE_Vec_Intrinsic21<"evfscfsf", + llvm_v2f32_ty, + llvm_v2i32_ty>; + +def int_ppc_spe_evfctui : PowerPC_SPE_Vec_Intrinsic21<"evfsctui", + llvm_v2i32_ty, + llvm_v2f32_ty>; +def int_ppc_spe_evfctsi : PowerPC_SPE_Vec_Intrinsic21<"evfsctsi", + llvm_v2i32_ty, + llvm_v2f32_ty>; +def int_ppc_spe_evfctuf : PowerPC_SPE_Vec_Intrinsic21<"evfsctuf", + llvm_v2i32_ty, + llvm_v2f32_ty>; +def int_ppc_spe_evfctsf : PowerPC_SPE_Vec_Intrinsic21<"evfsctsf", + llvm_v2i32_ty, + llvm_v2f32_ty>; +def int_ppc_spe_evfctuiz : PowerPC_SPE_Vec_Intrinsic21<"evfsctuiz", + llvm_v2i32_ty, + llvm_v2f32_ty>; +def int_ppc_spe_evfctsiz : PowerPC_SPE_Vec_Intrinsic21<"evfsctsiz", + llvm_v2i32_ty, + llvm_v2f32_ty>; +} Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -83,6 +83,16 @@ PPC::F24, PPC::F25, PPC::F26, PPC::F27, PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const MCPhysReg SPERegs[32] = { + PPC::S0, PPC::S1, PPC::S2, PPC::S3, + PPC::S4, PPC::S5, PPC::S6, PPC::S7, + PPC::S8, PPC::S9, PPC::S10, PPC::S11, + PPC::S12, PPC::S13, PPC::S14, PPC::S15, + PPC::S16, PPC::S17, PPC::S18, PPC::S19, + PPC::S20, PPC::S21, PPC::S22, PPC::S23, + PPC::S24, PPC::S25, PPC::S26, PPC::S27, + PPC::S28, PPC::S29, PPC::S30, PPC::S31 +}; static const MCPhysReg VFRegs[32] = { PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, @@ -645,6 +655,16 @@ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()])); } + void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(RRegs[getReg()])); + } + + void addRegSPERCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(SPERegs[getReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()])); Index: lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp =================================================================== --- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -226,6 +226,17 @@ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 }; +static const unsigned SPERegs[] = { + PPC::S0, PPC::S1, PPC::S2, PPC::S3, + PPC::S4, PPC::S5, PPC::S6, PPC::S7, + PPC::S8, PPC::S9, PPC::S10, PPC::S11, + PPC::S12, PPC::S13, PPC::S14, PPC::S15, + PPC::S16, PPC::S17, PPC::S18, PPC::S19, + PPC::S20, PPC::S21, PPC::S22, PPC::S23, + PPC::S24, PPC::S25, PPC::S26, PPC::S27, + PPC::S28, PPC::S29, PPC::S30, PPC::S31 +}; + template static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, const unsigned (&Regs)[N]) { @@ -327,6 +338,18 @@ return decodeRegisterClass(Inst, RegNo, QFRegs); } +static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GPRegs); +} + +static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SPERegs); +} + #define DecodeQSRCRegisterClass DecodeQFRCRegisterClass #define DecodeQBRCRegisterClass DecodeQFRCRegisterClass Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -35,6 +35,8 @@ def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500", "">; def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", @@ -356,6 +358,10 @@ FeatureFRES, FeatureFRSQRTE, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"e500", PPCE500Model, + [DirectiveE500, + FeatureSPE, FeatureICBT, FeatureBookE, + FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, FeatureSTFIWX, FeatureICBT, FeatureBookE, Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1458,6 +1458,7 @@ "ppc750", "ppc970", "ppcA2", + "ppce500", "ppce500mc", "ppce5500", "power3", Index: lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- lib/Target/PowerPC/PPCCallingConv.td +++ lib/Target/PowerPC/PPCCallingConv.td @@ -59,8 +59,17 @@ // Floating point types returned as "direct" go into F1 .. F8; note that // only the ELFv2 ABI fully utilizes all these registers. - CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfNotSubtarget<"hasSPE()", + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfNotSubtarget<"hasSPE()", + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfSubtarget<"hasSPE()", + CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfSubtarget<"hasSPE()", + CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + CCIfSubtarget<"hasSPE()", + CCIfType<[v2i32,v2f32], + CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, // QPX vectors are returned in QF1 and QF2. CCIfType<[v4f64, v4f32, v4i1], @@ -156,7 +165,18 @@ CCIfType<[f64], CCIfSplit>>, // FP values are passed in F1 - F8. - CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f32, f64], + CCIfNotSubtarget<"hasSPE()", + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfType<[f64], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + CCIfType<[f32], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfType<[v2i32,v2f32], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, // Split arguments have an alignment of 8 bytes on the stack. CCIfType<[i32], CCIfSplit>>, @@ -165,7 +185,10 @@ // Floats are stored in double precision format, thus they have the same // alignment and size as doubles. - CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + CCIfType<[f32,f64], CCIfNotSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>, + CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>, + CCIfType<[f64, v2i32, v2i64], + CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>, // QPX vectors that are stored in double precision need 32-byte alignment. CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>, Index: lib/Target/PowerPC/PPCFastISel.cpp =================================================================== --- lib/Target/PowerPC/PPCFastISel.cpp +++ lib/Target/PowerPC/PPCFastISel.cpp @@ -153,7 +153,8 @@ return RC->getID() == PPC::VSSRCRegClassID; } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, unsigned DestReg); + bool isZExt, unsigned DestReg, + const PPC::Predicate Pred); bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt = true, unsigned FP64LoadOpc = PPC::LFD); @@ -464,6 +465,7 @@ bool IsZExt, unsigned FP64LoadOpc) { unsigned Opc; bool UseOffset = true; + bool UseSPE = PPCSubTarget->hasSPE(); // If ResultReg is given, it determines the register class of the load. // Otherwise, RC is the register class to use. If the result of the @@ -475,8 +477,8 @@ const TargetRegisterClass *UseRC = (ResultReg ? MRI.getRegClass(ResultReg) : (RC ? RC : - (VT == MVT::f64 ? &PPC::F8RCRegClass : - (VT == MVT::f32 ? &PPC::F4RCRegClass : + (VT == MVT::f64 ? (UseSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) : + (VT == MVT::f32 ? (UseSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) : (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass))))); @@ -505,7 +507,7 @@ UseOffset = ((Addr.Offset & 3) == 0); break; case MVT::f32: - Opc = PPC::LFS; + Opc = PPCSubTarget->hasSPE() ? PPC::SPELWZ : PPC::LFS; break; case MVT::f64: Opc = FP64LoadOpc; @@ -553,6 +555,7 @@ // VSX only provides an indexed load. if (Is32VSXLoad || Is64VSXLoad) return false; + printf("Offset: %d, reg: %d\n", (int)Addr.Offset, Addr.Base.Reg); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -576,6 +579,8 @@ case PPC::LD: Opc = PPC::LDX; break; case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; + case PPC::EVLDD: Opc = PPC::EVLDDX; break; + case PPC::SPELWZ: Opc = PPC::SPELWZX; break; } auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), @@ -618,7 +623,8 @@ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; - if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true, + PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; updateValueMap(I, ResultReg); return true; @@ -651,10 +657,16 @@ UseOffset = ((Addr.Offset & 3) == 0); break; case MVT::f32: - Opc = PPC::STFS; + if (PPCSubTarget->hasSPE()) + Opc = PPC::SPESTW; + else + Opc = PPC::STFS; break; case MVT::f64: - Opc = PPC::STFD; + if (PPCSubTarget->hasSPE()) + Opc = PPC::EVSTDD; + else + Opc = PPC::STFD; break; } @@ -719,6 +731,8 @@ case PPC::STD: Opc = PPC::STDX; break; case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; + case PPC::EVSTDD: Opc = PPC::EVSTDDX; + case PPC::SPESTW: Opc = PPC::SPESTWX; break; } auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) @@ -792,7 +806,7 @@ unsigned CondReg = createResultReg(&PPC::CRRCRegClass); if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CondReg)) + CondReg, PPCPred)) return false; BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) @@ -820,7 +834,8 @@ // Attempt to emit a compare of the two source values. Signed and unsigned // comparisons are supported. Return false if we can't handle it. bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, - bool IsZExt, unsigned DestReg) { + bool IsZExt, unsigned DestReg, + const PPC::Predicate Pred) { Type *Ty = SrcValue1->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) @@ -836,6 +851,7 @@ // similar to ARM in this regard. long Imm = 0; bool UseImm = false; + const bool UseSPE = PPCSubTarget->hasSPE(); // Only 16-bit integer constants can be represented in compares for // PowerPC. Others will be materialized into a register. @@ -854,10 +870,38 @@ switch (SrcVT.SimpleTy) { default: return false; case MVT::f32: - CmpOpc = PPC::FCMPUS; + if (UseSPE) { + switch (Pred) { + default: return false; + case PPC::PRED_EQ: + CmpOpc = PPC::EFSCMPEQ; + break; + case PPC::PRED_LT: + CmpOpc = PPC::EFSCMPLT; + break; + case PPC::PRED_GT: + CmpOpc = PPC::EFSCMPGT; + break; + } + } else + CmpOpc = PPC::FCMPUS; break; case MVT::f64: - CmpOpc = PPC::FCMPUD; + if (UseSPE) { + switch (Pred) { + default: return false; + case PPC::PRED_EQ: + CmpOpc = PPC::EFDCMPEQ; + break; + case PPC::PRED_LT: + CmpOpc = PPC::EFDCMPLT; + break; + case PPC::PRED_GT: + CmpOpc = PPC::EFDCMPGT; + break; + } + } else + CmpOpc = PPC::FCMPUD; break; case MVT::i1: case MVT::i8: @@ -945,9 +989,17 @@ return false; // Round the result to single precision. - unsigned DestReg = createResultReg(&PPC::F4RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) - .addReg(SrcReg); + unsigned DestReg; + + if (PPCSubTarget->hasSPE()) { + DestReg = createResultReg(&PPC::SPE4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) + .addReg(SrcReg); + } else { + DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + } updateValueMap(I, DestReg); return true; @@ -1029,6 +1081,21 @@ if (SrcReg == 0) return false; + // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs + if (PPCSubTarget->hasSPE()) { + unsigned Opc; + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI; + else + Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI; + + unsigned DestReg = createResultReg(&PPC::SPERCRegClass); + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(SrcReg); + return true; + } + // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. if (!IsSigned && !PPCSubTarget->hasFPCVT()) @@ -1123,8 +1190,9 @@ if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; - // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. - if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && + !PPCSubTarget->hasFPCVT() && !PPCSubTarget->hasSPE()) return false; Value *Src = I->getOperand(0); @@ -1152,23 +1220,44 @@ // Determine the opcode for the conversion, which takes place // entirely within FPRs. - unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned DestReg; unsigned Opc; - if (DstVT == MVT::i32) - if (IsSigned) - Opc = PPC::FCTIWZ; + if (PPCSubTarget->hasSPE()) { + if (DstVT == MVT::i32) { + DestReg = createResultReg(&PPC::SPE4RCRegClass); + if (IsSigned) + Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSI : PPC::EFDCTSI; + else + Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUI : PPC::EFDCTUI; + } else { + // TODO: Convert single-precision to double if InReg SPE4RC + DestReg = createResultReg(&PPC::SPERCRegClass); + Opc = IsSigned ? PPC::EFDCTSI : PPC::EFDCTUI; + } + } else { + DestReg = createResultReg(&PPC::F8RCRegClass); + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else - Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; - else - Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + } // Generate the convert. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Now move the integer value from a float register to an integer register. - unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + unsigned IntReg; + + if (PPCSubTarget->hasSPE()) + IntReg = DestReg; + else + IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) return false; @@ -1916,8 +2005,11 @@ unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); assert(Align > 0 && "Unexpectedly missing alignment information!"); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); + const bool hasSPE = PPCSubTarget->hasSPE(); const TargetRegisterClass *RC = - (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass; + hasSPE ? + ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass) : + ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass); unsigned DestReg = createResultReg(RC); CodeModel::Model CModel = TM.getCodeModel(); @@ -1925,7 +2017,8 @@ MachinePointerInfo::getConstantPool(*FuncInfo.MF), MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); - unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; + unsigned Opc = (hasSPE) ? ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD) : + ((VT == MVT::f32) ? PPC::LFS : PPC::LFD); unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); PPCFuncInfo->setUsesTOCBasePtr(); @@ -2261,7 +2354,8 @@ unsigned ResultReg = MI->getOperand(0).getReg(); - if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt, + PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; MI->eraseFromParent(); Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -173,7 +173,27 @@ {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, -176}, - {PPC::V20, -192}}; + {PPC::V20, -192}, + + // SPE register save area (overlaps Vector save area + {PPC::S31, -8}, + {PPC::S30, -16}, + {PPC::S29, -24}, + {PPC::S28, -32}, + {PPC::S27, -40}, + {PPC::S26, -48}, + {PPC::S25, -56}, + {PPC::S24, -64}, + {PPC::S23, -72}, + {PPC::S22, -80}, + {PPC::S21, -88}, + {PPC::S20, -96}, + {PPC::S19, -104}, + {PPC::S18, -112}, + {PPC::S17, -120}, + {PPC::S16, -128}, + {PPC::S15, -136}, + {PPC::S14, -144}}; static const SpillSlot Offsets64[] = { // Floating-point register save area offsets. @@ -1693,7 +1713,8 @@ for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::GPRCRegClass.contains(Reg)) { + if (PPC::GPRCRegClass.contains(Reg) || + PPC::SPE4RCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1722,7 +1743,8 @@ ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; - } else if (PPC::VRRCRegClass.contains(Reg)) { + } else if (PPC::VRRCRegClass.contains(Reg) || + PPC::SPERCRegClass.contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2146,10 +2146,60 @@ Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { - Opc = PPC::FCMPUS; + if (PPCSubTarget->hasSPE()) { + switch (CC) { + default: + case ISD::SETEQ: + case ISD::SETNE: + Opc = PPC::EFSCMPEQ; + break; + case ISD::SETLT: + case ISD::SETGE: + case ISD::SETOLT: + case ISD::SETOGE: + case ISD::SETULT: + case ISD::SETUGE: + Opc = PPC::EFSCMPLT; + break; + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETOGT: + case ISD::SETOLE: + case ISD::SETUGT: + case ISD::SETULE: + Opc = PPC::EFSCMPGT; + break; + } + } else + Opc = PPC::FCMPUS; } else { assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); - Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + if (PPCSubTarget->hasSPE()) { + switch (CC) { + default: + case ISD::SETEQ: + case ISD::SETNE: + Opc = PPC::EFDCMPEQ; + break; + case ISD::SETLT: + case ISD::SETGE: + case ISD::SETOLT: + case ISD::SETOGE: + case ISD::SETULT: + case ISD::SETUGE: + Opc = PPC::EFDCMPLT; + break; + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETOGT: + case ISD::SETOLE: + case ISD::SETUGT: + case ISD::SETULE: + Opc = PPC::EFDCMPGT; + break; + } + } else + Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -2422,7 +2472,7 @@ // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. - if (LHS.getValueType().isVector()) { + if (LHS.getValueType().isVector() && PPCSubTarget->hasAltivec()) { if (PPCSubTarget->hasQPX()) return false; @@ -2453,6 +2503,12 @@ SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; + // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that + // The correct compare instruction is already set by SelectCC() + if (PPCSubTarget->hasSPE() && (LHS.getValueType().isFloatingPoint())) { + Idx = 1; + } + // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); @@ -2995,11 +3051,15 @@ else if (N->getValueType(0) == MVT::f32) if (PPCSubTarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; + else if (PPCSubTarget->hasSPE()) + SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; else if (N->getValueType(0) == MVT::f64) if (PPCSubTarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; + else if (PPCSubTarget->hasSPE()) + SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -560,6 +560,8 @@ bool useSoftFloat() const override; + bool hasSPE() const; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } @@ -854,6 +856,13 @@ unsigned JTI, MCContext &Ctx) const override; + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + MVT getRegisterTypeForCallingConv(MVT VT) const override; + MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + private: struct ReuseLoadInfo { SDValue Ptr; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -132,8 +132,15 @@ // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { - addRegisterClass(MVT::f32, &PPC::F4RCRegClass); - addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + if (hasSPE()) { + addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + addRegisterClass(MVT::v2i32, &PPC::SPERCRegClass); + addRegisterClass(MVT::v2f32, &PPC::SPERCRegClass); + } else { + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + } } // Match BITREVERSE to customized fast code sequence in the td file. @@ -335,12 +342,19 @@ setOperationAction(ISD::BR_JT, MVT::Other, Expand); - // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + if (Subtarget.hasSPE()) { + // SPE has built-in conversions + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + } else { + // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - // PowerPC does not have [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + // PowerPC does not have [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); @@ -465,7 +479,10 @@ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + if (Subtarget.hasSPE()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + else + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } // With the instructions enabled under FPCVT, we can do everything. @@ -499,6 +516,19 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + if (Subtarget.hasSPE()) { + setOperationAction(ISD::ADD, MVT::v2i32, Legal); + setOperationAction(ISD::ADD, MVT::v2f32, Legal); + setOperationAction(ISD::SUB, MVT::v2i32, Legal); + setOperationAction(ISD::SUB, MVT::v2f32, Legal); + + setOperationAction(ISD::AND, MVT::v2i32, Legal); + setOperationAction(ISD::OR, MVT::v2i32, Legal); + setOperationAction(ISD::XOR, MVT::v2i32, Legal); + + setOperationAction(ISD::SETCC, MVT::v2i32, Legal); + setOperationAction(ISD::SETCC, MVT::v2f32, Legal); + } if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -1024,6 +1054,7 @@ default: break; case PPC::DIR_970: case PPC::DIR_A2: + case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR4: @@ -1114,10 +1145,34 @@ return Align; } +unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return 2; + return PPCTargetLowering::getNumRegisters(Context, VT); +} + +MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return MVT::i32; + return PPCTargetLowering::getRegisterType(Context, VT); +} + +MVT PPCTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return MVT::i32; + return PPCTargetLowering::getRegisterType(VT); +} + bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } +bool PPCTargetLowering::hasSPE() const { + return Subtarget.hasSPE(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; @@ -3271,7 +3326,7 @@ // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); - if (useSoftFloat()) + if (useSoftFloat() || hasSPE()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -3295,12 +3350,16 @@ case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; + else if (Subtarget.hasSPE()) + RC = &PPC::SPE4RCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; + else if (Subtarget.hasSPE()) + RC = &PPC::SPERCRegClass; else RC = &PPC::F8RCRegClass; break; @@ -3322,6 +3381,10 @@ case MVT::v4i1: RC = &PPC::QBRCRegClass; break; + case MVT::v2i32: + case MVT::v2f32: + RC= &PPC::SPERCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. @@ -3389,7 +3452,7 @@ }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - if (useSoftFloat()) + if (useSoftFloat() || hasSPE()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); @@ -8426,6 +8489,11 @@ return DAG.getRegister(PPC::R2, MVT::i32); } + if (IntrinsicID == Intrinsic::ppc_spe_evabs) { + SDValue V = Op.getOperand(1); + return DAG.getNode(ISD::ABS, dl, V.getValueType(), V); + } + // We are looking for absolute values here. // The idea is to try to fit one of two patterns: // max (a, (0-a)) OR max ((0-a), a) @@ -9745,10 +9813,14 @@ MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || + MI.getOpcode() == PPC::SELECT_CC_SPE4 || + MI.getOpcode() == PPC::SELECT_CC_SPE || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || + MI.getOpcode() == PPC::SELECT_SPE4 || + MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || @@ -10336,6 +10408,7 @@ return 3; case PPC::DIR_440: case PPC::DIR_A2: + case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: return 2; @@ -12731,14 +12804,21 @@ // really care overly much here so just give them all the same reg classes. case 'd': case 'f': - if (VT == MVT::f32 || VT == MVT::i32) - return std::make_pair(0U, &PPC::F4RCRegClass); - if (VT == MVT::f64 || VT == MVT::i64) - return std::make_pair(0U, &PPC::F8RCRegClass); - if (VT == MVT::v4f64 && Subtarget.hasQPX()) - return std::make_pair(0U, &PPC::QFRCRegClass); - if (VT == MVT::v4f32 && Subtarget.hasQPX()) - return std::make_pair(0U, &PPC::QSRCRegClass); + if (Subtarget.hasSPE()) { + if (VT == MVT::f32 || VT == MVT::i32) + return std::make_pair(0U, &PPC::SPE4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return std::make_pair(0U, &PPC::SPERCRegClass); + } else { + if (VT == MVT::f32 || VT == MVT::i32) + return std::make_pair(0U, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return std::make_pair(0U, &PPC::F8RCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); + } break; case 'v': if (VT == MVT::v4f64 && Subtarget.hasQPX()) Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -936,8 +936,19 @@ BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; + } else if (PPC::SPERCRegClass.contains(SrcReg) && + PPC::SPE4RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); + return; + } else if (PPC::SPE4RCRegClass.contains(SrcReg) && + PPC::SPERCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); + return; } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; @@ -970,6 +981,10 @@ Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; + else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::EVOR; + else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; else llvm_unreachable("Impossible reg-to-reg copy"); @@ -1015,6 +1030,16 @@ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); + } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::EVSTDD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPESTW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) .addReg(SrcReg, @@ -1157,6 +1182,12 @@ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); + } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::EVLDD), DestReg), + FrameIdx)); + } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPELWZ), DestReg), + FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::RESTORE_CR), DestReg), Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -516,6 +516,19 @@ let ParserMatchClass = PPCRegCRRCAsmOperand; } +def PPCRegSPERCAsmOperand : AsmOperandClass { + let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; +} +def sperc : RegisterOperand { + let ParserMatchClass = PPCRegSPERCAsmOperand; +} +def PPCRegSPE4RCAsmOperand : AsmOperandClass { + let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; +} +def spe4rc : RegisterOperand { + let ParserMatchClass = PPCRegSPE4RCAsmOperand; +} + def PPCU1ImmAsmOperand : AsmOperandClass { let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; let RenderMethod = "addImmOperands"; @@ -855,7 +868,7 @@ def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">; def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; -def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; +def HasSPE : Predicate<"PPCSubTarget->hasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; @@ -863,6 +876,7 @@ def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; +def HasTraditionalFPU : Predicate<"!PPCSubTarget->hasSPE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -1200,12 +1214,14 @@ def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; +let Predicates = [HasTraditionalFPU] in { def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, f4rc:$T, f4rc:$F), "#SELECT_F4", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, f8rc:$T, f8rc:$F), "#SELECT_F8", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; +} def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, vrrc:$T, vrrc:$F), "#SELECT_VRRC", [(set v4i32:$dst, @@ -1789,12 +1805,14 @@ "lwz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; +let Predicates = [HasTraditionalFPU] in { def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), "lfs $rD, $src", IIC_LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), "lfd $rD, $src", IIC_LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; +} // Unindexed (r+i) Loads with Update (preinc). @@ -1819,6 +1837,7 @@ []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; +let Predicates = [HasTraditionalFPU] in { def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, @@ -1828,6 +1847,7 @@ "lfdu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; +} // Indexed (r+r) Loads with Update (preinc). @@ -1855,6 +1875,7 @@ []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; +let Predicates = [HasTraditionalFPU] in { def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", IIC_LdStLFDUX, @@ -1868,6 +1889,7 @@ NoEncode<"$ea_result">; } } +} // Indexed (r+r) Loads. // @@ -1892,6 +1914,7 @@ "lwbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; +let Predicates = [HasTraditionalFPU] in { def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src), "lfsx $frD, $src", IIC_LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; @@ -1906,6 +1929,7 @@ "lfiwzx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } +} // Load Multiple def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), @@ -1926,6 +1950,7 @@ def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), "stw $rS, $src", IIC_LdStStore, [(store i32:$rS, iaddr:$src)]>; +let Predicates = [HasTraditionalFPU] in { def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", IIC_LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; @@ -1933,6 +1958,7 @@ "stfd $rS, $dst", IIC_LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } +} // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { @@ -1945,6 +1971,7 @@ def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +let Predicates = [HasTraditionalFPU] in { def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), "stfsu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; @@ -1952,6 +1979,7 @@ "stfdu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +} // Patterns to match the pre-inc stores. We can't put the patterns on // the instruction definitions directly as ISel wants the address base @@ -1991,6 +2019,7 @@ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; +let Predicates = [HasTraditionalFPU] in { def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; @@ -2002,6 +2031,7 @@ "stfdx $frS, $dst", IIC_LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } +} // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { @@ -2017,6 +2047,7 @@ "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; +let Predicates = [HasTraditionalFPU] in { def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), "stfsux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -2026,6 +2057,7 @@ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } +} // Patterns to match the pre-inc stores. We can't put the patterns on // the instruction definitions directly as ISel wants the address base @@ -2036,10 +2068,12 @@ (STHUX $rS, $ptrreg, $ptroff)>; def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STWUX $rS, $ptrreg, $ptroff)>; +let Predicates = [HasTraditionalFPU] in { def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFSUX $rS, $ptrreg, $ptroff)>; def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFDUX $rS, $ptrreg, $ptroff)>; +} // Store Multiple def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), @@ -2223,7 +2257,7 @@ "cmplw $crD, $rA, $rB", IIC_IntCompare>; } } -let PPC970_Unit = 3 in { // FPU Operations. +let PPC970_Unit = 3, Predicates = [HasTraditionalFPU] in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), // "fcmpo $crD, $fA, $fB", IIC_FPCompare>; let isCompare = 1, hasSideEffects = 0 in { @@ -2301,13 +2335,13 @@ /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). -let hasSideEffects = 0 in +let hasSideEffects = 0, Predicates = [HasTraditionalFPU] in defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), "fmr", "$frD, $frB", IIC_FPGeneral, []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; -let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations. +let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasTraditionalFPU] in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), "fabs", "$frD, $frB", IIC_FPGeneral, @@ -2556,6 +2590,7 @@ "mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>; } // hasSideEffects = 0 +let Predicates = [HasTraditionalFPU] in { // Pseudo instruction to perform FADD in round-to-zero mode. let usesCustomInserter = 1, Uses = [RM] in { def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", @@ -2615,6 +2650,7 @@ "mffsl $rT", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } +} let Predicates = [IsISA3_0] in { def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), @@ -2712,7 +2748,7 @@ // A-Form instructions. Most of the instructions executed in the FPU are of // this type. // -let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations. +let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasTraditionalFPU] in { // FPU Operations. let Uses = [RM] in { let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, @@ -3038,6 +3074,7 @@ (LHZ iaddr:$src)>; def : Pat<(extloadi16 xaddr:$src), (LHZX xaddr:$src)>; +let Predicates = [HasTraditionalFPU] in { def : Pat<(f64 (extloadf32 iaddr:$src)), (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; def : Pat<(f64 (extloadf32 xaddr:$src)), @@ -3045,6 +3082,7 @@ def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; +} // Only seq_cst fences require the heavyweight sync (SYNC 0). // All others can use the lightweight sync (SYNC 1). @@ -3056,6 +3094,7 @@ def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>; def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>; +let Predicates = [HasTraditionalFPU] in { // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), (FNMSUB $A, $C, $B)>; @@ -3071,6 +3110,7 @@ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>; def : Pat<(fcopysign f32:$frB, f64:$frA), (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; +} include "PPCInstrAltivec.td" include "PPCInstrSPE.td" @@ -3513,6 +3553,7 @@ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; // SETCC for f32. +let Predicates = [HasTraditionalFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), @@ -3573,7 +3614,146 @@ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; +} +let Predicates = [HasSPE] in { +def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst), + (ins crrc:$cond, spe4rc:$T, spe4rc:$F, + i32imm:$BROPC), "#SELECT_CC_SPE4", + []>; +def SELECT_CC_SPE : Pseudo<(outs sperc:$dst), + (ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC), + "#SELECT_CC_SPE", + []>; +def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond, + spe4rc:$T, spe4rc:$F), "#SELECT_F4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; +def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond, + sperc:$T, sperc:$F), "#SELECT_F8", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + +// SETCC for f32. +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; + +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETOLT)), + (EXTRACT_SUBREG (EVFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETLT)), + (EXTRACT_SUBREG (EVFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETOGT)), + (EXTRACT_SUBREG (EVFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETGT)), + (EXTRACT_SUBREG (EVFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETEQ)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETUO)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETUGE)), + (EXTRACT_SUBREG (EVFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETGE)), + (EXTRACT_SUBREG (EVFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETULE)), + (EXTRACT_SUBREG (EVFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETLE)), + (EXTRACT_SUBREG (EVFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETUNE)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETNE)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2f32:$s1, v2f32:$s2, SETO)), + (EXTRACT_SUBREG (EVFSCMPEQ $s1, $s2), sub_gt)>; + +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETOLT)), + (EXTRACT_SUBREG (EVCMPLTS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETLT)), + (EXTRACT_SUBREG (EVCMPLTS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETOGT)), + (EXTRACT_SUBREG (EVCMPGTS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETGT)), + (EXTRACT_SUBREG (EVCMPGTS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETOEQ)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETEQ)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETUO)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETUGE)), + (EXTRACT_SUBREG (EVCMPLTU $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETGE)), + (EXTRACT_SUBREG (EVCMPLTS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETULE)), + (EXTRACT_SUBREG (EVCMPGTU $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETLE)), + (EXTRACT_SUBREG (EVCMPGTS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETUNE)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETNE)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc v2i32:$s1, v2i32:$s2, SETO)), + (EXTRACT_SUBREG (EVCMPEQ $s1, $s2), sub_gt)>; +} // match select on i1 variables: def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), (CROR (CRAND $cond , $tval), @@ -3656,6 +3836,7 @@ def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; +let Predicates = [HasTraditionalFPU] in { def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), @@ -3676,6 +3857,7 @@ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; +} def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; @@ -3826,6 +4008,7 @@ def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>; def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>; +let Predicates = [HasTraditionalFPU] in { def MTFSF : XFLForm_1<63, 711, (outs), (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>; @@ -3835,6 +4018,7 @@ def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>; def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>; +} def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), "slbie $RB", IIC_SprSLBIE, []>; Index: lib/Target/PowerPC/PPCInstrSPE.td =================================================================== --- lib/Target/PowerPC/PPCInstrSPE.td +++ lib/Target/PowerPC/PPCInstrSPE.td @@ -12,13 +12,61 @@ // //===----------------------------------------------------------------------===// +class EFXForm_1 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +def imm32SExt5 : Operand, ImmLeaf= -16 && (int32_t)Imm <= 15); +}]>; + +class EFXForm_2 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> : + EFXForm_1 { + let RB = 0; +} + +class EFXForm_2a xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> : + EFXForm_1 { + let RA = 0; +} + +class EFXForm_3 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : + I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + + let Inst{6-8} = crD; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + class EVXForm_1 xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + InstrItinClass itin, list pattern> : + I<4, OOL, IOL, asmstr, itin> { bits<5> RT; bits<5> RA; bits<5> RB; - let Pattern = []; + let Pattern = pattern; let Inst{6-10} = RT; let Inst{11-15} = RA; @@ -27,17 +75,25 @@ } class EVXForm_2 xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> : EVXForm_1 { + InstrItinClass itin, list pattern> : + EVXForm_1 { let RB = 0; } +class EVXForm_2a xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> : + EVXForm_1 { + let RA = 0; +} + class EVXForm_3 xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + InstrItinClass itin, list pattern> : + I<4, OOL, IOL, asmstr, itin> { bits<3> crD; bits<5> RA; bits<5> RB; - let Pattern = []; + let Pattern = pattern; let Inst{6-8} = crD; let Inst{9-10} = 0; @@ -46,12 +102,30 @@ let Inst{21-31} = xo; } +class EVXForm_4 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + bits<5> RT; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-28} = xo; + let Inst{29-31} = crD; +} + class EVXForm_D xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + InstrItinClass itin, list pattern> : + I<4, OOL, IOL, asmstr, itin> { bits<5> RT; bits<21> D; - let Pattern = []; + let Pattern = pattern; let Inst{6-10} = RT; let Inst{20} = D{0}; @@ -68,380 +142,727 @@ let Inst{21-31} = xo; } -let Predicates = [HasSPE], isAsmParserOnly = 1 in { - -def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst), - "evldd $RT, $dst", IIC_VecFP>; -def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst), - "evldw $RT, $dst", IIC_VecFP>; -def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst), - "evldh $RT, $dst", IIC_VecFP>; -def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst), - "evlhhesplat $RT, $dst", IIC_VecFP>; -def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst), - "evlhhousplat $RT, $dst", IIC_VecFP>; -def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst), - "evlhhossplat $RT, $dst", IIC_VecFP>; -def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst), - "evlwhe $RT, $dst", IIC_VecFP>; -def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst), - "evlwhou $RT, $dst", IIC_VecFP>; -def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst), - "evlwhos $RT, $dst", IIC_VecFP>; -def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst), - "evlwwsplat $RT, $dst", IIC_VecFP>; -def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst), - "evlwhsplat $RT, $dst", IIC_VecFP>; - -def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst), - "evstdd $RT, $dst", IIC_VecFP>; -def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst), - "evstdh $RT, $dst", IIC_VecFP>; -def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst), - "evstdw $RT, $dst", IIC_VecFP>; -def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst), - "evstwhe $RT, $dst", IIC_VecFP>; -def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst), - "evstwho $RT, $dst", IIC_VecFP>; -def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst), - "evstwwe $RT, $dst", IIC_VecFP>; -def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst), - "evstwwo $RT, $dst", IIC_VecFP>; - -def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA), - "evmra $RT, $RA", IIC_VecFP> { - let RB = 0; +let DecoderNamespace = "SPE", Predicates = [HasSPE] in { + +def BRINC : EVXForm_1<527, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "brinc $RT, $RA, $RB", IIC_VecFP, []>; + +// Double-precision floating point +def EFDABS : EFXForm_2<740, (outs sperc:$RT), (ins sperc:$RA), + "efdabs $RT, $RA", IIC_FPGeneral, + [(set f64:$RT, (fabs f64:$RA))]>; + +def EFDADD : EFXForm_1<736, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdadd $RT, $RA, $RB", IIC_FPAddSub, + [(set f64:$RT, (fadd f64:$RA, f64:$RB))]>; + +def EFDCFS : EFXForm_2a<751, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfs $RT, $RB", IIC_FPGeneral, + [(set f64:$RT, (fpextend f32:$RB))]>; + +def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfsf $RT, $RB", IIC_FPGeneral, []>; + +def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB), + "efdcfsi $RT, $RB", IIC_FPGeneral, + [(set f64:$RT, (sint_to_fp i32:$RB))]>; + +def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB), + "efdcfsid $RT, $RB", IIC_FPGeneral, + []>; + +def EFDCFUF : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfuf $RT, $RB", IIC_FPGeneral, []>; + +def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB), + "efdcfui $RT, $RB", IIC_FPGeneral, + [(set f64:$RT, (uint_to_fp i32:$RB))]>; + +def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB), + "efdcfuid $RT, $RB", IIC_FPGeneral, + []>; + +let isCompare = 1 in { +def EFDCMPEQ : EFXForm_3<750, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmpeq $crD, $RA, $RB", IIC_FPCompare>; +def EFDCMPGT : EFXForm_3<748, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmpgt $crD, $RA, $RB", IIC_FPCompare>; +def EFDCMPLT : EFXForm_3<749, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmplt $crD, $RA, $RB", IIC_FPCompare>; +} + +def EFDCTSF : EFXForm_2a<759, (outs sperc:$RT), (ins spe4rc:$RB), + "efdctsf $RT, $RB", IIC_FPGeneral, []>; + +def EFDCTSI : EFXForm_2a<757, (outs gprc:$RT), (ins sperc:$RB), + "efdctsi $RT, $RB", IIC_FPGeneral, + []>; + +def EFDCTSIDZ : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB), + "efdctsidz $RT, $RB", IIC_FPGeneral, + []>; + +def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB), + "efdctsiz $RT, $RB", IIC_FPGeneral, + [(set i32:$RT, (fp_to_sint f64:$RB))]>; + +def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB), + "efdctuf $RT, $RB", IIC_FPGeneral, []>; + +def EFDCTUI : EFXForm_2a<756, (outs gprc:$RT), (ins sperc:$RB), + "efdctui $RT, $RB", IIC_FPGeneral, + []>; + +def EFDCTUIDZ : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB), + "efdctuidz $RT, $RB", IIC_FPGeneral, + []>; + +def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB), + "efdctuiz $RT, $RB", IIC_FPGeneral, + [(set i32:$RT, (fp_to_uint f64:$RB))]>; + +def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efddiv $RT, $RA, $RB", IIC_FPDivD, + [(set f64:$RT, (fdiv f64:$RA, f64:$RB))]>; + +def EFDMUL : EFXForm_1<744, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdmul $RT, $RA, $RB", IIC_FPGeneral, + [(set f64:$RT, (fmul f64:$RA, f64:$RB))]>; + +def EFDNABS : EFXForm_2<741, (outs sperc:$RT), (ins sperc:$RA), + "efdnabs $RT, $RA", IIC_FPGeneral, + [(set f64:$RT, (fneg (fabs f64:$RA)))]>; + +def EFDNEG : EFXForm_2<742, (outs sperc:$RT), (ins sperc:$RA), + "efdneg $RT, $RA", IIC_FPGeneral, + [(set f64:$RT, (fneg f64:$RA))]>; + +def EFDSUB : EFXForm_1<737, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdsub $RT, $RA, $RB", IIC_FPAddSub, + [(set f64:$RT, (fsub f64:$RA, f64:$RB))]>; + +let isCompare = 1 in { +def EFDTSTEQ : EFXForm_3<766, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtsteq $crD, $RA, $RB", IIC_FPCompare>; +def EFDTSTGT : EFXForm_3<764, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtstgt $crD, $RA, $RB", IIC_FPCompare>; +def EFDTSTLT : EFXForm_3<765, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtstlt $crD, $RA, $RB", IIC_FPCompare>; +} + +// Single-precision floating point +def EFSABS : EFXForm_2<708, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsabs $RT, $RA", IIC_FPGeneral, + [(set f32:$RT, (fabs f32:$RA))]>; + +def EFSADD : EFXForm_1<704, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsadd $RT, $RA, $RB", IIC_FPAddSub, + [(set f32:$RT, (fadd f32:$RA, f32:$RB))]>; + +def EFSCFD : EFXForm_2a<719, (outs spe4rc:$RT), (ins sperc:$RB), + "efscfd $RT, $RB", IIC_FPGeneral, + [(set f32:$RT, (fpround f64:$RB))]>; + +def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efscfsf $RT, $RB", IIC_FPGeneral, []>; + +def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB), + "efscfsi $RT, $RB", IIC_FPGeneral, + [(set f32:$RT, (sint_to_fp i32:$RB))]>; + +def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efscfuf $RT, $RB", IIC_FPGeneral, []>; + +def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB), + "efscfui $RT, $RB", IIC_FPGeneral, + [(set f32:$RT, (uint_to_fp i32:$RB))]>; + +let isCompare = 1 in { +def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmpeq $crD, $RA, $RB", IIC_FPCompare>; +def EFSCMPGT : EFXForm_3<716, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmpgt $crD, $RA, $RB", IIC_FPCompare>; +def EFSCMPLT : EFXForm_3<717, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmplt $crD, $RA, $RB", IIC_FPCompare>; } -def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "brinc $RT, $RA, $RB", IIC_VecFP>; -def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA), - "evabs $RT, $RA", IIC_VecFP>; - -def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), - "evaddiw $RT, $RB, $RA", IIC_VecFP>; -def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA), - "evaddsmiaaw $RT, $RA", IIC_VecFP>; -def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA), - "evaddssiaaw $RT, $RA", IIC_VecFP>; -def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA), - "evaddusiaaw $RT, $RA", IIC_VecFP>; -def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA), - "evaddumiaaw $RT, $RA", IIC_VecFP>; -def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evaddw $RT, $RA, $RB", IIC_VecFP>; - -def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evand $RT, $RA, $RB", IIC_VecFP>; -def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evandc $RT, $RA, $RB", IIC_VecFP>; - -def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), - "evcmpeq $crD, $RA, $RB", IIC_VecFP>; -def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), - "evcmpgts $crD, $RA, $RB", IIC_VecFP>; -def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), - "evcmpgtu $crD, $RA, $RB", IIC_VecFP>; -def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), - "evcmplts $crD, $RA, $RB", IIC_VecFP>; -def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), - "evcmpltu $crD, $RA, $RB", IIC_VecFP>; - -def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA), - "evcntlsw $RT, $RA", IIC_VecFP>; -def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA), - "evcntlzw $RT, $RA", IIC_VecFP>; - -def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evdivws $RT, $RA, $RB", IIC_VecFP>; -def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evdivwu $RT, $RA, $RB", IIC_VecFP>; - -def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "eveqv $RT, $RA, $RB", IIC_VecFP>; - -def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA), - "evextsb $RT, $RA", IIC_VecFP>; -def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA), - "evextsh $RT, $RA", IIC_VecFP>; - -def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlddx $RT, $RA, $RB", IIC_VecFP>; -def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evldwx $RT, $RA, $RB", IIC_VecFP>; -def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evldhx $RT, $RA, $RB", IIC_VecFP>; -def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>; -def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>; -def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>; -def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlwhex $RT, $RA, $RB", IIC_VecFP>; -def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlwhoux $RT, $RA, $RB", IIC_VecFP>; -def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlwhosx $RT, $RA, $RB", IIC_VecFP>; -def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>; -def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>; - -def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmergehi $RT, $RA, $RB", IIC_VecFP>; -def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmergelo $RT, $RA, $RB", IIC_VecFP>; -def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmergehilo $RT, $RA, $RB", IIC_VecFP>; -def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmergelohi $RT, $RA, $RB", IIC_VecFP>; - -def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>; -def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegsmian $RT, $RA, $RB", IIC_VecFP>; -def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhegumian $RT, $RA, $RB", IIC_VecFP>; - -def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmf $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmfa $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmi $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmia $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhesmianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessf $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessfa $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessfanw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhessianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheumi $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheumia $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheumianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmheusianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogsmian $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhogumian $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmf $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmfa $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmi $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmia $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhosmianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossf $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossfa $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossfanw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhossianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhoumi $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhoumia $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhoumianw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmhousianw $RT, $RA, $RB", IIC_VecFP>; - - -def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhsmf $RT, $RA, $RB", IIC_VecFP>; -def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>; -def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhsmi $RT, $RA, $RB", IIC_VecFP>; -def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhsmia $RT, $RA, $RB", IIC_VecFP>; -def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhssf $RT, $RA, $RB", IIC_VecFP>; -def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhssfa $RT, $RA, $RB", IIC_VecFP>; -def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhumi $RT, $RA, $RB", IIC_VecFP>; -def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwhumia $RT, $RA, $RB", IIC_VecFP>; -def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlssianw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlumi $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlumia $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlumianw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>; -def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwlusianw $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmf $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmfa $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmfan $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmi $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmia $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwsmian $RT, $RA, $RB", IIC_VecFP>; -def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwssf $RT, $RA, $RB", IIC_VecFP>; -def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwssfa $RT, $RA, $RB", IIC_VecFP>; -def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwssfaa $RT, $RA, $RB", IIC_VecFP>; -def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwssfan $RT, $RA, $RB", IIC_VecFP>; -def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwumi $RT, $RA, $RB", IIC_VecFP>; -def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwumia $RT, $RA, $RB", IIC_VecFP>; -def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwumiaa $RT, $RA, $RB", IIC_VecFP>; -def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evmwumian $RT, $RA, $RB", IIC_VecFP>; - - -def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evnand $RT, $RA, $RB", IIC_VecFP>; - -def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA), - "evneg $RT, $RA", IIC_VecFP>; - -def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evnor $RT, $RA, $RB", IIC_VecFP>; -def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evor $RT, $RA, $RB", IIC_VecFP>; -def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evorc $RT, $RA, $RB", IIC_VecFP>; - -def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), - "evrlwi $RT, $RA, $RB", IIC_VecFP>; -def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evrlw $RT, $RA, $RB", IIC_VecFP>; - -def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA), - "evrndw $RT, $RA", IIC_VecFP>; - -def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), - "evslwi $RT, $RA, $RB", IIC_VecFP>; -def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evslw $RT, $RA, $RB", IIC_VecFP>; - -def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA), - "evsplatfi $RT, $RA", IIC_VecFP>; -def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA), - "evsplati $RT, $RA", IIC_VecFP>; - -def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), - "evsrwis $RT, $RA, $RB", IIC_VecFP>; -def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), - "evsrwiu $RT, $RA, $RB", IIC_VecFP>; -def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evsrws $RT, $RA, $RB", IIC_VecFP>; -def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evsrwu $RT, $RA, $RB", IIC_VecFP>; - -def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstddx $RT, $RA, $RB", IIC_VecFP>; -def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstdhx $RT, $RA, $RB", IIC_VecFP>; -def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstdwx $RT, $RA, $RB", IIC_VecFP>; -def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstwhex $RT, $RA, $RB", IIC_VecFP>; -def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstwhox $RT, $RA, $RB", IIC_VecFP>; -def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstwwex $RT, $RA, $RB", IIC_VecFP>; -def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), - "evstwwox $RT, $RA, $RB", IIC_VecFP>; - -def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA), - "evsubfssiaaw $RT, $RA", IIC_VecFP>; -def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA), - "evsubfsmiaaw $RT, $RA", IIC_VecFP>; -def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA), - "evsubfumiaaw $RT, $RA", IIC_VecFP>; -def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA), - "evsubfusiaaw $RT, $RA", IIC_VecFP>; -def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evsubfw $RT, $RA, $RB", IIC_VecFP>; -def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB), - "evsubifw $RT, $RA, $RB", IIC_VecFP>; -def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), - "evxor $RT, $RA, $RB", IIC_VecFP>; +def EFSCTSF : EFXForm_2a<727, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efsctsf $RT, $RB", IIC_FPGeneral, []>; + +def EFSCTSI : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctsi $RT, $RB", IIC_FPGeneral, + [(set i32:$RT, (fp_to_sint f32:$RB))]>; + +def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctsiz $RT, $RB", IIC_FPGeneral, + []>; + +def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB), + "efsctuf $RT, $RB", IIC_FPGeneral, []>; + +def EFSCTUI : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctui $RT, $RB", IIC_FPGeneral, + [(set i32:$RT, (fp_to_uint f32:$RB))]>; + +def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctuiz $RT, $RB", IIC_FPGeneral, + []>; + +def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsdiv $RT, $RA, $RB", IIC_FPDivD, + [(set f32:$RT, (fdiv f32:$RA, f32:$RB))]>; + +def EFSMUL : EFXForm_1<712, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsmul $RT, $RA, $RB", IIC_FPGeneral, + [(set f32:$RT, (fmul f32:$RA, f32:$RB))]>; + +def EFSNABS : EFXForm_2<709, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsnabs $RT, $RA", IIC_FPGeneral, + [(set f32:$RT, (fneg (fabs f32:$RA)))]>; + +def EFSNEG : EFXForm_2<710, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsneg $RT, $RA", IIC_FPGeneral, + [(set f32:$RT, (fneg f32:$RA))]>; + +def EFSSUB : EFXForm_1<705, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efssub $RT, $RA, $RB", IIC_FPAddSub, + [(set f32:$RT, (fsub f32:$RA, f32:$RB))]>; + +let isCompare = 1 in { +def EFSTSTEQ : EFXForm_3<734, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststeq $crD, $RA, $RB", IIC_FPCompare>; +def EFSTSTGT : EFXForm_3<732, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststgt $crD, $RA, $RB", IIC_FPCompare>; +def EFSTSTLT : EFXForm_3<733, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststlt $crD, $RA, $RB", IIC_FPCompare>; +} + +// SPE Vector operations + +def EVABS : EVXForm_2<520, (outs sperc:$RT), (ins sperc:$RA), + "evabs $RT, $RA", IIC_VecFP, + [(set v2i32:$RT, (abs v2i32:$RA))]>; + +def EVADDIW : EVXForm_1<514, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evaddiw $RT, $RB, $RA", IIC_VecFP, + [(set v2i32:$RT, + (add v2i32:$RA, + (build_vector imm32SExt5:$RB)))]>; +def EVADDSMIAAW : EVXForm_2<1225, (outs sperc:$RT), (ins sperc:$RA), + "evaddsmiaaw $RT, $RA", IIC_VecFP, []>; +def EVADDSSIAAW : EVXForm_2<1217, (outs sperc:$RT), (ins sperc:$RA), + "evaddssiaaw $RT, $RA", IIC_VecFP, []>; +def EVADDUSIAAW : EVXForm_2<1216, (outs sperc:$RT), (ins sperc:$RA), + "evaddusiaaw $RT, $RA", IIC_VecFP, []>; +def EVADDUMIAAW : EVXForm_2<1224, (outs sperc:$RT), (ins sperc:$RA), + "evaddumiaaw $RT, $RA", IIC_VecFP, []>; +def EVADDW : EVXForm_1<512, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evaddw $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (add v2i32:$RA, v2i32:$RB))]>; + +def EVAND : EVXForm_1<529, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evand $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (and v2i32:$RA, v2i32:$RB))]>; +def EVANDC : EVXForm_1<530, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evandc $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (and v2i32:$RA, + (xor (v2i32 immAllOnesV), v2i32:$RB)))]>; + +let isCompare = 1 in { +def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpeq $crD, $RA, $RB", IIC_VecFP, []>; +def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpgts $crD, $RA, $RB", IIC_VecFP, []>; +def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpgtu $crD, $RA, $RB", IIC_VecFP, []>; +def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmplts $crD, $RA, $RB", IIC_VecFP, []>; +def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpltu $crD, $RA, $RB", IIC_VecFP, []>; +} + +def EVCNTLSW : EVXForm_2<526, (outs sperc:$RT), (ins sperc:$RA), + "evcntlsw $RT, $RA", IIC_VecFP, []>; +def EVCNTLZW : EVXForm_2<525, (outs sperc:$RT), (ins sperc:$RA), + "evcntlzw $RT, $RA", IIC_VecFP, + [(set v2i32:$RT, (ctlz v2i32:$RA))]>; + +def EVDIVWS : EVXForm_1<1222, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evdivws $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (sdiv v2i32:$RA, v2i32:$RB))]>; +def EVDIVWU : EVXForm_1<1223, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evdivwu $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (udiv v2i32:$RA, v2i32:$RB))]>; + +def EVEQV : EVXForm_1<537, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "eveqv $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, + (not (xor v2i32:$RA, v2i32:$RB)))]>; + +def EVEXTSB : EVXForm_2<522, (outs sperc:$RT), (ins sperc:$RA), + "evextsb $RT, $RA", IIC_VecFP, + []>; +def EVEXTSH : EVXForm_2<523, (outs sperc:$RT), (ins sperc:$RA), + "evextsh $RT, $RA", IIC_VecFP, + []>; + +def EVFSABS : EVXForm_2<644, (outs sperc:$RT), (ins sperc:$RA), + "evfsabs $RT, $RA", IIC_VecFP, + [(set v2f32:$RT, (fabs v2f32:$RA))]>; +def EVFSADD : EVXForm_1<640, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsadd $RT, $RA, $RB", IIC_VecFP, + [(set v2f32:$RT, (fadd v2f32:$RA, v2f32:$RB))]>; +def EVFSCFSF : EVXForm_2a<659, (outs sperc:$RT), (ins sperc:$RB), + "evfscfsf $RT, $RB", IIC_VecFP, []>; +def EVFSCFSI : EVXForm_2a<657, (outs sperc:$RT), (ins sperc:$RB), + "evfscfsi $RT, $RB", IIC_VecFP, + [(set v2f32:$RT, (sint_to_fp v2i32:$RB))]>; +def EVFSCFUF : EVXForm_2a<658, (outs sperc:$RT), (ins sperc:$RB), + "evfscfuf $RT, $RB", IIC_VecFP, []>; +def EVFSCFUI : EVXForm_2a<650, (outs sperc:$RT), (ins sperc:$RB), + "evfscfui $RT, $RB", IIC_VecFP, + [(set v2f32:$RT, (uint_to_fp v2i32:$RB))]>; +let isCompare = 1 in { +def EVFSCMPEQ : EVXForm_3<654, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmpeq $crD, $RA, $RB", IIC_FPCompare, []>; +def EVFSCMPGT : EVXForm_3<652, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmpgt $crD, $RA, $RB", IIC_FPCompare, []>; +def EVFSCMPLT : EVXForm_3<653, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmplt $crD, $RA, $RB", IIC_FPCompare, []>; +} + +def EVFSCTSF : EVXForm_2a<663, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsf $RT, $RB", IIC_FPGeneral, []>; +def EVFSCTSI : EVXForm_2a<661, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsi $RT, $RB", IIC_FPGeneral, + [(set v2i32:$RT, (fp_to_sint v2f32:$RB))]>; +def EVFSCTSIZ : EVXForm_2a<666, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsiz $RT, $RB", IIC_FPGeneral, + []>; +def EVFSCTUF : EVXForm_2a<662, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsf $RT, $RB", IIC_FPGeneral, []>; +def EVFSCTUI : EVXForm_2a<660, (outs sperc:$RT), (ins sperc:$RB), + "evfsctui $RT, $RB", IIC_FPGeneral, + [(set v2i32:$RT, (fp_to_uint v2f32:$RB))]>; +def EVFSCTUIZ : EVXForm_2a<664, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsiz $RT, $RB", IIC_FPGeneral, + []>; +def EVFSDIV : EVXForm_1<649, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsdiv $RT, $RA, $RB", IIC_FPDivD, + [(set v2f32:$RT, (fdiv v2f32:$RA, v2f32:$RB))]>; +def EVFSMUL : EVXForm_1<648, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsmul $RT, $RA, $RB", IIC_FPGeneral, + [(set v2f32:$RT, (fmul v2f32:$RA, v2f32:$RB))]>; +def EVFSNABS : EVXForm_2<645, (outs sperc:$RT), (ins sperc:$RA), + "evfsnabs $RT, $RA", IIC_FPGeneral, + [(set v2f32:$RT, (fneg (fabs v2f32:$RA)))]>; +def EVFSNEG : EVXForm_2<646, (outs sperc:$RT), (ins sperc:$RA), + "evfsneg $RT, $RA", IIC_FPGeneral, + [(set v2f32:$RT, (fneg v2f32:$RA))]>; +def EVFSSUB : EVXForm_1<641, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfssub $RT, $RA, $RB", IIC_FPAddSub, + [(set v2f32:$RT, (fsub v2f32:$RA, v2f32:$RB))]>; + +let isCompare = 1 in { +def EVFSTSTEQ : EVXForm_3<670, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststeq $crD, $RA, $RB", IIC_FPCompare, []>; +def EVFSTSTGT : EVXForm_3<668, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststgt $crD, $RA, $RB", IIC_FPCompare, []>; +def EVFSTSTLT : EVXForm_3<669, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststlt $crD, $RA, $RB", IIC_FPCompare, []>; +} + +def EVLDD : EVXForm_D<769, (outs sperc:$RT), (ins spe8dis:$dst), + "evldd $RT, $dst", IIC_VecFP, + [(set f64:$RT, (load ixaddr:$dst))]>; +def EVLDDX : EVXForm_1<768, (outs sperc:$RT), (ins memrr:$src), + "evlddx $RT, $src", IIC_VecFP, + [(set f64:$RT, (load xaddr:$src))]>; +def EVLDH : EVXForm_D<773, (outs sperc:$RT), (ins spe8dis:$dst), + "evldh $RT, $dst", IIC_VecFP, []>; +def EVLDHX : EVXForm_1<772, (outs sperc:$RT), (ins memrr:$src), + "evldhx $RT, $src", IIC_VecFP, []>; +def EVLDW : EVXForm_D<771, (outs sperc:$RT), (ins spe8dis:$dst), + "evldw $RT, $dst", IIC_VecFP, + [(set v2i32:$RT, (load ixaddr:$dst))]>; +def EVLDWX : EVXForm_1<770, (outs sperc:$RT), (ins memrr:$src), + "evldwx $RT, $src", IIC_VecFP, + [(set v2i32:$RT, (load xaddr:$src))]>; +let isAsmParserOnly = 1 in { +def EVLDWFS : EVXForm_D<771, (outs sperc:$RT), (ins spe8dis:$dst), + "evldw $RT, $dst", IIC_VecFP, + [(set v2f32:$RT, (load ixaddr:$dst))]>; +def EVLDWXFS : EVXForm_1<770, (outs sperc:$RT), (ins memrr:$src), + "evldwx $RT, $src", IIC_VecFP, + [(set v2f32:$RT, (load xaddr:$src))]>; +} +def EVLHHESPLAT : EVXForm_D<777, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhesplat $RT, $dst", IIC_VecFP, []>; +def EVLHHESPLATX : EVXForm_1<776, (outs sperc:$RT), (ins memrr:$src), + "evlhhesplatx $RT, $src", IIC_VecFP, []>; +def EVLHHOUSPLAT : EVXForm_D<781, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhousplat $RT, $dst", IIC_VecFP, []>; +def EVLHHOUSPLATX : EVXForm_1<780, (outs sperc:$RT), (ins memrr:$src), + "evlhhousplatx $RT, $src", IIC_VecFP, []>; +def EVLHHOSSPLAT : EVXForm_D<783, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhossplat $RT, $dst", IIC_VecFP, []>; +def EVLHHOSSPLATX : EVXForm_1<782, (outs sperc:$RT), (ins memrr:$src), + "evlhhossplatx $RT, $src", IIC_VecFP, []>; +def EVLWHE : EVXForm_D<785, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhe $RT, $dst", IIC_VecFP, []>; +def EVLWHEX : EVXForm_1<784, (outs sperc:$RT), (ins memrr:$src), + "evlwhex $RT, $src", IIC_VecFP, []>; +def EVLWHOS : EVXForm_D<791, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhos $RT, $dst", IIC_VecFP, []>; +def EVLWHOSX : EVXForm_1<790, (outs sperc:$RT), (ins memrr:$src), + "evlwhosx $RT, $src", IIC_VecFP, []>; +def EVLWHOU : EVXForm_D<789, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhou $RT, $dst", IIC_VecFP, []>; +def EVLWHOUX : EVXForm_1<788, (outs sperc:$RT), (ins memrr:$src), + "evlwhoux $RT, $src", IIC_VecFP, []>; +def EVLWHSPLAT : EVXForm_D<797, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhsplat $RT, $dst", IIC_VecFP, []>; +def EVLWHSPLATX : EVXForm_1<796, (outs sperc:$RT), (ins memrr:$src), + "evlwhsplatx $RT, $src", IIC_VecFP, []>; +def EVLWWSPLAT : EVXForm_D<793, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwwsplat $RT, $dst", IIC_VecFP, []>; +def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src), + "evlwwsplatx $RT, $src", IIC_VecFP, []>; + +def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergelo $RT, $RA, $RB", IIC_VecFP, []>; +def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehilo $RT, $RA, $RB", IIC_VecFP, []>; +def EVMERGELOHI : EVXForm_1<559, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergelohi $RT, $RA, $RB", IIC_VecFP, []>; + +def EVMHEGSMFAA : EVXForm_1<1323, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEGSMFAN : EVXForm_1<1451, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmfan $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEGSMIAA : EVXForm_1<1321, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEGSMIAN : EVXForm_1<1449, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmian $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEGUMIAA : EVXForm_1<1320, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegumiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEGUMIAN : EVXForm_1<1448, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegumian $RT, $RA, $RB", IIC_VecFP, []>; + +def EVMHESMF : EVXForm_1<1035, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMFA : EVXForm_1<1067, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMFAAW : EVXForm_1<1291, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMFANW : EVXForm_1<1419, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfanw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMI : EVXForm_1<1033, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMIA : EVXForm_1<1065, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMIAAW : EVXForm_1<1289, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESMIANW : EVXForm_1<1417, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSF : EVXForm_1<1027, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSFA : EVXForm_1<1059, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSFAAW : EVXForm_1<1283, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSFANW : EVXForm_1<1411, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfanw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSIAAW : EVXForm_1<1281, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHESSIANW : EVXForm_1<1409, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUMI : EVXForm_1<1032, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUMIA : EVXForm_1<1064, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUMIAAW : EVXForm_1<1288, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUMIANW : EVXForm_1<1416, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUSIAAW : EVXForm_1<1280, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheusiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHEUSIANW : EVXForm_1<1408, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheusianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGSMFAA : EVXForm_1<1327, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGSMFAN : EVXForm_1<1455, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmfan $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGSMIAA : EVXForm_1<1325, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGSMIAN : EVXForm_1<1453, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmian $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGUMIAA : EVXForm_1<1324, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogumiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOGUMIAN : EVXForm_1<1452, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogumian $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMF : EVXForm_1<1039, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMFA : EVXForm_1<1071, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMFAAW : EVXForm_1<1295, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMFANW : EVXForm_1<1423, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfanw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMI : EVXForm_1<1037, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMIA : EVXForm_1<1069, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMIAAW : EVXForm_1<1293, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSMIANW : EVXForm_1<1421, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSF : EVXForm_1<1031, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSFA : EVXForm_1<1063, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSFAAW : EVXForm_1<1287, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSFANW : EVXForm_1<1415, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfanw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSIAAW : EVXForm_1<1285, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOSSIANW : EVXForm_1<1413, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUMI : EVXForm_1<1036, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUMIA : EVXForm_1<1068, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUMIAAW : EVXForm_1<1292, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUMIANW : EVXForm_1<1420, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUSIAAW : EVXForm_1<1284, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhousiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMHOUSIANW : EVXForm_1<1412, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhousianw $RT, $RA, $RB", IIC_VecFP, []>; + +def EVMRA : EVXForm_2<1220, (outs sperc:$RT), (ins sperc:$RA), + "evmra $RT, $RA", IIC_VecFP, []>; + +def EVMWHSMF : EVXForm_1<1103, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHSMFA : EVXForm_1<1135, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHSMI : EVXForm_1<1101, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHSMIA : EVXForm_1<1133, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHSSF : EVXForm_1<1095, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhssf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHSSFA : EVXForm_1<1127, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhssfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHUMI : EVXForm_1<1100, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhumi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWHUMIA : EVXForm_1<1132, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhumia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLSMIAAW : EVXForm_1<1353, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLSMIANW : EVXForm_1<1481, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlsmianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLSSIAAW : EVXForm_1<1345, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLSSIANW : EVXForm_1<1473, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlssianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLUMI : EVXForm_1<1096, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumi $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (mul v2i32:$RA, v2i32:$RB))]>; +def EVMWLUMIA : EVXForm_1<1128, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLUMIAAW : EVXForm_1<1352, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLUMIANW : EVXForm_1<1480, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLUSIAAW : EVXForm_1<1344, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWLUSIANW : EVXForm_1<1472, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlusianw $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMF : EVXForm_1<1115, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMFA : EVXForm_1<1147, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMFAA : EVXForm_1<1371, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMFAN : EVXForm_1<1499, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfan $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMI : EVXForm_1<1113, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMIA : EVXForm_1<1145, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMIAA : EVXForm_1<1369, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSMIAN : EVXForm_1<1497, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmian $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSSF : EVXForm_1<1107, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssf $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSSFA : EVXForm_1<1139, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSSFAA : EVXForm_1<1363, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWSSFAN : EVXForm_1<1491, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfan $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWUMI : EVXForm_1<1112, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumi $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWUMIA : EVXForm_1<1144, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumia $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWUMIAA : EVXForm_1<1368, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumiaa $RT, $RA, $RB", IIC_VecFP, []>; +def EVMWUMIAN : EVXForm_1<1496, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumian $RT, $RA, $RB", IIC_VecFP, []>; + + +def EVNAND : EVXForm_1<542, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evnand $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, + (xor (v2i32 immAllOnesV), + (and v2i32:$RA, v2i32:$RB)))]>; + +def EVNEG : EVXForm_2<521, (outs sperc:$RT), (ins sperc:$RA), + "evneg $RT, $RA", IIC_VecFP, + [(set v2i32:$RT, + (sub (v2i32 immAllZerosV), v2i32:$RA))]>; + +def EVNOR : EVXForm_1<536, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evnor $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, + (xor (v2i32 immAllOnesV), + (or v2i32:$RA, v2i32:$RB)))]>; +def EVOR : EVXForm_1<535, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evor $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (or v2i32:$RA, v2i32:$RB))]>; +def EVORC : EVXForm_1<539, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evorc $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (or v2i32:$RA, + (xor (v2i32 immAllOnesV), v2i32:$RB)))]>; + +def EVRLWI : EVXForm_1<554, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evrlwi $RT, $RA, $RB", IIC_VecFP, []>; +def EVRLW : EVXForm_1<552, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evrlw $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (rotl v2i32:$RA, v2i32:$RB))]>; + +def EVRNDW : EVXForm_2<524, (outs sperc:$RT), (ins sperc:$RA), + "evrndw $RT, $RA", IIC_VecFP, []>; + +def EVSEL : EVXForm_4<79, (outs sperc:$RT), + (ins sperc:$RA, sperc:$RB, crrc:$crD), + "evsel crD,$RT,$RA,$RB", IIC_VecFP, []>; + +def EVSLWI : EVXForm_1<550, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evslwi $RT, $RA, $RB", IIC_VecFP, []>; +def EVSLW : EVXForm_1<548, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evslw $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (shl v2i32:$RA, v2i32:$RB))]>; + +def EVSPLATFI : EVXForm_2<555, (outs sperc:$RT), (ins i32imm:$RA), + "evsplatfi $RT, $RA", IIC_VecFP, []>; +def EVSPLATI : EVXForm_2<553, (outs sperc:$RT), (ins s5imm:$RA), + "evsplati $RT, $RA", IIC_VecFP, + [(set v2i32:$RT, (build_vector imm32SExt5:$RA))]>; + +def EVSRWIS : EVXForm_1<547, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evsrwis $RT, $RA, $RB", IIC_VecFP, []>; +def EVSRWIU : EVXForm_1<546, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evsrwiu $RT, $RA, $RB", IIC_VecFP, []>; +def EVSRWS : EVXForm_1<545, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsrws $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (sra v2i32:$RA, v2i32:$RB))]>; +def EVSRWU : EVXForm_1<544, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsrwu $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (srl v2i32:$RA, v2i32:$RB))]>; + +def EVSTDD : EVXForm_D<801, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdd $RT, $dst", IIC_VecFP, + [(store f64:$RT, ixaddr:$dst)]>; +def EVSTDDX : EVXForm_1<800, (outs), (ins sperc:$RT, memrr:$dst), + "evstddx $RT, $dst", IIC_VecFP, + [(store f64:$RT, xaddr:$dst)]>; +def EVSTDH : EVXForm_D<805, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdh $RT, $dst", IIC_VecFP, []>; +def EVSTDHX : EVXForm_1<804, (outs), (ins sperc:$RT, memrr:$dst), + "evstdhx $RT, $dst", IIC_VecFP, []>; +def EVSTDW : EVXForm_D<803, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdw $RT, $dst", IIC_VecFP, + [(store v2i32:$RT, ixaddr:$dst)]>; +def EVSTDWX : EVXForm_1<802, (outs), (ins sperc:$RT, memrr:$dst), + "evstdwx $RT, $dst", IIC_VecFP, + [(store v2i32:$RT, xaddr:$dst)]>; +let isAsmParserOnly = 1 in { +def EVSTDWFS : EVXForm_D<803, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdw $RT, $dst", IIC_VecFP, + [(store v2f32:$RT, ixaddr:$dst)]>; +def EVSTDWXFS : EVXForm_1<802, (outs), (ins sperc:$RT, memrr:$dst), + "evstdwx $RT, $dst", IIC_VecFP, + [(store v2f32:$RT, xaddr:$dst)]>; +} +def EVSTWHE : EVXForm_D<817, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwhe $RT, $dst", IIC_VecFP, []>; +def EVSTWHEX : EVXForm_1<816, (outs), (ins sperc:$RT, memrr:$dst), + "evstwhex $RT, $dst", IIC_VecFP, []>; +def EVSTWHO : EVXForm_D<821, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwho $RT, $dst", IIC_VecFP, []>; +def EVSTWHOX : EVXForm_1<820, (outs), (ins sperc:$RT, memrr:$dst), + "evstwhox $RT, $dst", IIC_VecFP, []>; +def EVSTWWE : EVXForm_D<825, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwwe $RT, $dst", IIC_VecFP, []>; +def EVSTWWEX : EVXForm_1<824, (outs), (ins sperc:$RT, memrr:$dst), + "evstwwex $RT, $dst", IIC_VecFP, []>; +def EVSTWWO : EVXForm_D<829, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwwo $RT, $dst", IIC_VecFP, []>; +def EVSTWWOX : EVXForm_1<828, (outs), (ins sperc:$RT, memrr:$dst), + "evstwwox $RT, $dst", IIC_VecFP, []>; + +def EVSUBFSSIAAW : EVXForm_2<1219, (outs sperc:$RT), (ins sperc:$RA), + "evsubfssiaaw $RT, $RA", IIC_VecFP, []>; +def EVSUBFSMIAAW : EVXForm_2<1227, (outs sperc:$RT), (ins sperc:$RA), + "evsubfsmiaaw $RT, $RA", IIC_VecFP, []>; +def EVSUBFUMIAAW : EVXForm_2<1226, (outs sperc:$RT), (ins sperc:$RA), + "evsubfumiaaw $RT, $RA", IIC_VecFP, []>; +def EVSUBFUSIAAW : EVXForm_2<1218, (outs sperc:$RT), (ins sperc:$RA), + "evsubfusiaaw $RT, $RA", IIC_VecFP, []>; +def EVSUBFW : EVXForm_1<516, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsubfw $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (sub v2i32:$RB, v2i32:$RA))]>; +def EVSUBIFW : EVXForm_1<518, (outs sperc:$RT), (ins u5imm:$RA, sperc:$RB), + "evsubifw $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, + (sub v2i32:$RB, + (build_vector imm32SExt5:$RA)))]>; +def EVXOR : EVXForm_1<534, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evxor $RT, $RA, $RB", IIC_VecFP, + [(set v2i32:$RT, (xor v2i32:$RA, v2i32:$RB))]>; + +let isAsmParserOnly = 1 in { +// Identical to the integer Load/Stores, but to handle floats +def SPELWZ : DForm_1<32, (outs spe4rc:$rD), (ins memri:$src), + "lwz $rD, $src", IIC_LdStLoad, + [(set f32:$rD, (load iaddr:$src))]>; +def SPELWZX : XForm_1<31, 23, (outs spe4rc:$rD), (ins memrr:$src), + "lwzx $rD, $src", IIC_LdStLoad, + [(set f32:$rD, (load xaddr:$src))]>; +def SPESTW : DForm_1<36, (outs), (ins spe4rc:$rS, memri:$src), + "stw $rS, $src", IIC_LdStStore, + [(store f32:$rS, iaddr:$src)]>; +def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst), + "stwx $rS, $dst", IIC_LdStStore, + [(store f32:$rS, xaddr:$dst)]>; +} } // HasSPE + +let Predicates = [HasSPE] in { +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>; + +def : Pat<(f64 (fpextend f32:$src)), + (COPY_TO_REGCLASS $src, SPERC)>; +} Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -100,6 +100,12 @@ ImmToIdxMap[PPC::STXV] = PPC::STXVX; ImmToIdxMap[PPC::STXSD] = PPC::STXSDX; ImmToIdxMap[PPC::STXSSP] = PPC::STXSSPX; + + // SPE + ImmToIdxMap[PPC::EVLDD] = PPC::EVLDDX; + ImmToIdxMap[PPC::EVSTDD] = PPC::EVSTDDX; + ImmToIdxMap[PPC::SPESTW] = PPC::SPESTWX; + ImmToIdxMap[PPC::SPELWZ] = PPC::SPELWZX; } /// getPointerRegClass - Return the register class to use to hold pointers. @@ -307,6 +313,8 @@ return 0; case PPC::G8RC_NOX0RegClassID: case PPC::GPRC_NOR0RegClassID: + case PPC::SPERCRegClassID: + case PPC::SPE4RCRegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -38,6 +38,13 @@ let SubRegIndices = [sub_32]; } +// SPE - One of the 32 64-bit general-purpose registers (SPE) +class SPE : PPCReg { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; +} + // SPR - One of the 32-bit special-purpose registers class SPR num, string n> : PPCReg { let HWEncoding{9-0} = num; @@ -100,6 +107,12 @@ DwarfRegNum<[Index, -2]>; } +// SPE registers +foreach Index = 0-31 in { + def S#Index : SPE("R"#Index), "r"#Index>, + DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>; +} + // Floating-point registers foreach Index = 0-31 in { def F#Index : FPR, @@ -208,6 +221,9 @@ // VRsave register def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; +// SPE extra registers +def SPEACC: DwarfRegNum<[99, 111]>; +def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>; // Carry bit. In the architecture this is really bit 0 of the XER register // (which really is SPR register 1); this is the only bit interesting to a // compiler. @@ -272,6 +288,20 @@ }]; } +def SPERC : RegisterClass<"PPC", [f64,v2i32,v2f32], 64, (add (sequence "S%u", 2, 12), + (sequence "S%u", 30, 13), + S31, S0, S1)> { + // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so + // put it at the end of the list. + let AltOrders = [(add (sub SPERC, S2), S2)]; + let AltOrderSelect = [{ + const PPCSubtarget &S = MF.getSubtarget(); + return S.isPPC64() && S.isSVR4ABI(); + }]; +} + +def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>; + // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the // allocated non-volatile register with the lowest register number, as FP Index: lib/Target/PowerPC/PPCSchedule.td =================================================================== --- lib/Target/PowerPC/PPCSchedule.td +++ lib/Target/PowerPC/PPCSchedule.td @@ -133,5 +133,6 @@ include "PPCScheduleP8.td" include "PPCScheduleP9.td" include "PPCScheduleA2.td" +include "PPCScheduleE500.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" Index: lib/Target/PowerPC/PPCScheduleE500.td =================================================================== --- lib/Target/PowerPC/PPCScheduleE500.td +++ lib/Target/PowerPC/PPCScheduleE500.td @@ -1,4 +1,4 @@ -//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +//===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Freescale e500mc 32-bit +// This file defines the itinerary class data for the Freescale e500 32-bit // Power processor. // -// All information is derived from the "e500mc Core Reference Manual", +// All information is derived from the "e500 Core Reference Manual", // Freescale Document Number E500MCRM, Rev. 1, 03/2012. // //===----------------------------------------------------------------------===// -// Relevant functional units in the Freescale e500mc core: +// Relevant functional units in the Freescale e500 core: // // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue @@ -23,89 +23,75 @@ def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute -// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. -// Some instructions can only execute in SFX0 but not SFX1. -// The CFX has a bypass path, allowing non-divide instructions to execute -// while a divide instruction is executed. -def E500_SFX0 : FuncUnit; // Simple unit 0 -def E500_SFX1 : FuncUnit; // Simple unit 1 +// 6 pipelined execution units: SU0, SU1, BU, LSU, MU. +// Some instructions can only execute in SU0 but not SU1. +def E500_SU0 : FuncUnit; // Simple unit 0 +def E500_SU1 : FuncUnit; // Simple unit 1 def E500_BU : FuncUnit; // Branch unit -def E500_CFX_DivBypass - : FuncUnit; // CFX divide bypass path -def E500_CFX_0 : FuncUnit; // CFX pipeline +def E500_MU : FuncUnit; // MU pipeline def E500_LSU_0 : FuncUnit; // LSU pipeline -def E500_FPU_0 : FuncUnit; // FPU pipeline def E500_GPR_Bypass : Bypass; -def E500_FPR_Bypass : Bypass; def E500_CR_Bypass : Bypass; -def PPCE500mcItineraries : ProcessorItineraries< - [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass, - E500_CFX_0, E500_LSU_0, E500_FPU_0], - [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [ +def PPCE500Itineraries : ProcessorItineraries< + [E500_DIS0, E500_DIS1, E500_SU0, E500_SU1, E500_BU, + E500_MU, E500_LSU_0], + [E500_CR_Bypass, E500_GPR_Bypass], [ InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1], // Latency = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1], // Latency = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1, 1], // Latency = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass, E500_CR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [5, 1, 1], // Latency = 1 or 2 [E500_CR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_CFX_0], 0>, - InstrStage<14, [E500_CFX_DivBypass]>], + InstrStage<1, [E500_MU], 0>, + InstrStage<14, [E500_MU]>], [17, 1, 1], // Latency=4..35, Repeat= 4..35 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<8, [E500_FPU_0]>], - [11], // Latency = 8 - [E500_FPR_Bypass]>, - InstrItinData, - InstrStage<8, [E500_FPU_0]>], - [11, 1, 1], // Latency = 8 - [NoBypass, NoBypass, NoBypass]>, InstrItinData, - InstrStage<1, [E500_CFX_0]>], + InstrStage<1, [E500_MU]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_CFX_0]>], + InstrStage<1, [E500_MU]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_CFX_0]>], + InstrStage<1, [E500_MU]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1], // Latency = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1], // Latency = 1 [E500_GPR_Bypass, E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<2, [E500_SFX0]>], + InstrStage<2, [E500_SU0]>], [5, 1], // Latency = 2, Repeat rate = 2 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, @@ -122,7 +108,7 @@ [4, 1], // Latency = 1 [E500_CR_Bypass, E500_CR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1, 1], // Latency = 1 [E500_CR_Bypass, E500_GPR_Bypass]>, InstrItinData, @@ -142,13 +128,13 @@ [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass], 2>, // 2 micro-ops InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass], @@ -158,7 +144,7 @@ [6, 1], // Latency = 3 [NoBypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [NoBypass, E500_GPR_Bypass], @@ -167,48 +153,17 @@ InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], - [6, 1, 1], // Latency = 3 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], - [6, 1, 1], // Latency = 3 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_LSU_0]>], - [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], - [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], - [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], - 2>, // 2 micro-ops InstrItinData, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [E500_GPR_Bypass, E500_GPR_Bypass]>, @@ -228,96 +183,61 @@ InstrItinData, InstrStage<1, [E500_LSU_0]>]>, InstrItinData, - InstrStage<4, [E500_SFX0]>], + InstrStage<4, [E500_SU0]>], [7, 1], [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<2, [E500_SFX0, E500_SFX1]>], + InstrStage<2, [E500_SU0, E500_SU1]>], [5, 1], // Latency = 2, Repeat rate = 4 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0]>], + InstrStage<1, [E500_SU0]>], [5, 1], [NoBypass, E500_GPR_Bypass]>, InstrItinData, InstrStage<1, [E500_LSU_0], 0>]>, InstrItinData, - InstrStage<5, [E500_SFX0]>], + InstrStage<5, [E500_SU0]>], [8, 1], [E500_GPR_Bypass, E500_CR_Bypass]>, InstrItinData, - InstrStage<5, [E500_SFX0]>], + InstrStage<5, [E500_SU0]>], [8, 1], [E500_GPR_Bypass, E500_CR_Bypass]>, InstrItinData, - InstrStage<4, [E500_SFX0]>], + InstrStage<4, [E500_SU0]>], [7, 1], // Latency = 4, Repeat rate = 4 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<4, [E500_SFX0]>], + InstrStage<4, [E500_SU0]>], [7, 1], // Latency = 4, Repeat rate = 4 [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1], // Latency = 1, Repeat rate = 1 [E500_GPR_Bypass, E500_CR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0]>], + InstrStage<1, [E500_SU0]>], [4, 1], // Latency = 1, Repeat rate = 1 [E500_CR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<4, [E500_SFX0]>], + InstrStage<4, [E500_SU0]>], [7, 1], // Latency = 4, Repeat rate = 4 [NoBypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + InstrStage<1, [E500_SU0, E500_SU1]>], [4, 1], // Latency = 1, Repeat rate = 1 [E500_CR_Bypass, E500_GPR_Bypass]>, InstrItinData, - InstrStage<1, [E500_SFX0]>], + InstrStage<1, [E500_SU0]>], [4, 1], - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_FPU_0]>], - [13, 1, 1], // Latency = 10, Repeat rate = 4 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [E500_CR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<68, [E500_FPU_0]>], - [71, 1, 1], // Latency = 68, Repeat rate = 68 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<38, [E500_FPU_0]>], - [41, 1, 1], // Latency = 38, Repeat rate = 38 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_FPU_0]>], - [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass, - E500_FPR_Bypass]>, - InstrItinData, - InstrStage<38, [E500_FPU_0]>], - [41, 1], // Latency = 38, Repeat rate = 38 - [E500_FPR_Bypass, E500_FPR_Bypass]> + [NoBypass, E500_GPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// -// e500mc machine model for scheduling and other instruction cost heuristics. +// e500 machine model for scheduling and other instruction cost heuristics. -def PPCE500mcModel : SchedMachineModel { +def PPCE500Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. let LoadLatency = 5; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the @@ -325,5 +245,5 @@ let CompleteModel = 0; - let Itineraries = PPCE500mcItineraries; + let Itineraries = PPCE500Itineraries; } Index: lib/Target/PowerPC/PPCScheduleE500mc.td =================================================================== --- lib/Target/PowerPC/PPCScheduleE500mc.td +++ lib/Target/PowerPC/PPCScheduleE500mc.td @@ -19,299 +19,299 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1 -def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 +def E500mc_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500mc_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // Some instructions can only execute in SFX0 but not SFX1. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is executed. -def E500_SFX0 : FuncUnit; // Simple unit 0 -def E500_SFX1 : FuncUnit; // Simple unit 1 -def E500_BU : FuncUnit; // Branch unit -def E500_CFX_DivBypass +def E500mc_SFX0 : FuncUnit; // Simple unit 0 +def E500mc_SFX1 : FuncUnit; // Simple unit 1 +def E500mc_BU : FuncUnit; // Branch unit +def E500mc_CFX_DivBypass : FuncUnit; // CFX divide bypass path -def E500_CFX_0 : FuncUnit; // CFX pipeline -def E500_LSU_0 : FuncUnit; // LSU pipeline -def E500_FPU_0 : FuncUnit; // FPU pipeline +def E500mc_CFX_0 : FuncUnit; // CFX pipeline +def E500mc_LSU_0 : FuncUnit; // LSU pipeline +def E500mc_FPU_0 : FuncUnit; // FPU pipeline -def E500_GPR_Bypass : Bypass; -def E500_FPR_Bypass : Bypass; -def E500_CR_Bypass : Bypass; +def E500mc_GPR_Bypass : Bypass; +def E500mc_FPR_Bypass : Bypass; +def E500mc_CR_Bypass : Bypass; def PPCE500mcItineraries : ProcessorItineraries< - [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass, - E500_CFX_0, E500_LSU_0, E500_FPU_0], - [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [ - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_DIS0, E500mc_DIS1, E500mc_SFX0, E500mc_SFX1, E500mc_BU, E500mc_CFX_DivBypass, + E500mc_CFX_0, E500mc_LSU_0, E500mc_FPU_0], + [E500mc_CR_Bypass, E500mc_GPR_Bypass, E500mc_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1], // Latency = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1], // Latency = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1, 1], // Latency = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass, - E500_CR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass, + E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [5, 1, 1], // Latency = 1 or 2 - [E500_CR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_CFX_0], 0>, - InstrStage<14, [E500_CFX_DivBypass]>], + [E500mc_CR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_CFX_0], 0>, + InstrStage<14, [E500mc_CFX_DivBypass]>], [17, 1, 1], // Latency=4..35, Repeat= 4..35 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<8, [E500_FPU_0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500mc_FPU_0]>], [11], // Latency = 8 - [E500_FPR_Bypass]>, - InstrItinData, - InstrStage<8, [E500_FPU_0]>], + [E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500mc_FPU_0]>], [11, 1, 1], // Latency = 8 [NoBypass, NoBypass, NoBypass]>, - InstrItinData, - InstrStage<1, [E500_CFX_0]>], + InstrItinData, + InstrStage<1, [E500mc_CFX_0]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_CFX_0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_CFX_0]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_CFX_0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_CFX_0]>], [7, 1, 1], // Latency = 4, Repeat rate = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1], // Latency = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1], // Latency = 1 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_SFX0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500mc_SFX0]>], [5, 1], // Latency = 2, Repeat rate = 2 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_BU]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_BU]>], [4, 1], // Latency = 1 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_BU]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_BU]>], [4, 1, 1], // Latency = 1 - [E500_CR_Bypass, - E500_CR_Bypass, E500_CR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_BU]>], + [E500mc_CR_Bypass, + E500mc_CR_Bypass, E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_BU]>], [4, 1], // Latency = 1 - [E500_CR_Bypass, E500_CR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_CR_Bypass, E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1, 1], // Latency = 1 - [E500_CR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3, Repeat rate = 1 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [NoBypass, E500_GPR_Bypass], + [NoBypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1, 1], // Latency = 3 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1, 1], // Latency = 3 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [7, 1, 1], // Latency = 4 - [E500_FPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass], + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1], 0>, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [7, 1], // Latency = r+3 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<3, [E500_LSU_0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E500mc_LSU_0]>], [6, 1, 1], // Latency = 3, Repeat rate = 3 - [E500_GPR_Bypass, - E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>], + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0]>]>, - InstrItinData, - InstrStage<4, [E500_SFX0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0]>]>, + InstrItinData, + InstrStage<4, [E500mc_SFX0]>], [7, 1], - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500mc_SFX0, E500mc_SFX1]>], [5, 1], // Latency = 2, Repeat rate = 4 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0]>], [5, 1], - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_LSU_0], 0>]>, - InstrItinData, - InstrStage<5, [E500_SFX0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_LSU_0], 0>]>, + InstrItinData, + InstrStage<5, [E500mc_SFX0]>], [8, 1], - [E500_GPR_Bypass, E500_CR_Bypass]>, - InstrItinData, - InstrStage<5, [E500_SFX0]>], + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<5, [E500mc_SFX0]>], [8, 1], - [E500_GPR_Bypass, E500_CR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_SFX0]>], + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E500mc_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_SFX0]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500mc_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 - [E500_GPR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1], // Latency = 1, Repeat rate = 1 - [E500_GPR_Bypass, E500_CR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0]>], + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0]>], [4, 1], // Latency = 1, Repeat rate = 1 - [E500_CR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_SFX0]>], + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500mc_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0, E500_SFX1]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], [4, 1], // Latency = 1, Repeat rate = 1 - [E500_CR_Bypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<1, [E500_SFX0]>], + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500mc_SFX0]>], [4, 1], - [NoBypass, E500_GPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_FPU_0]>], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500mc_FPU_0]>], [11, 1, 1], // Latency = 8, Repeat rate = 2 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_FPU_0]>], + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500mc_FPU_0]>], [13, 1, 1], // Latency = 10, Repeat rate = 4 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<2, [E500_FPU_0]>], + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500mc_FPU_0]>], [11, 1, 1], // Latency = 8, Repeat rate = 2 - [E500_CR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<68, [E500_FPU_0]>], + [E500mc_CR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<68, [E500mc_FPU_0]>], [71, 1, 1], // Latency = 68, Repeat rate = 68 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<38, [E500_FPU_0]>], + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500mc_FPU_0]>], [41, 1, 1], // Latency = 38, Repeat rate = 38 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass]>, - InstrItinData, - InstrStage<4, [E500_FPU_0]>], + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500mc_FPU_0]>], [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 - [E500_FPR_Bypass, - E500_FPR_Bypass, E500_FPR_Bypass, - E500_FPR_Bypass]>, - InstrItinData, - InstrStage<38, [E500_FPU_0]>], + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass, + E500mc_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500mc_FPU_0]>], [41, 1], // Latency = 38, Repeat rate = 38 - [E500_FPR_Bypass, E500_FPR_Bypass]> + [E500mc_FPR_Bypass, E500mc_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -46,6 +46,7 @@ DIR_750, DIR_970, DIR_A2, + DIR_E500, DIR_E500mc, DIR_E5500, DIR_PWR3, Index: test/CodeGen/PowerPC/spe.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/spe.ll @@ -0,0 +1,515 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s + +declare float @llvm.fabs.float(float) +define float @test_float_abs(float %a) #0 { +; CHECK-LABEL: test_float_abs + entry: + %0 = tail call float @llvm.fabs.float(float %a) + ret float %0 +} +; CHECK: efsabs 3, 3 +; CHECK: blr + +define float @test_fnabs(float %a) #0 { + entry: + %0 = tail call float @llvm.fabs.float(float %a) + %sub = fsub float -0.000000e+00, %0 + ret float %sub +} +; CHECK-LABEL: @test_fnabs +; CHECK-NO: efsnabs +; CHECK: blr + +define float @test_fdiv(float %a, float %b) { +entry: + %v = fdiv float %a, %b + ret float %v + +; CHECK-LABEL: test_fdiv +; CHECK: efsdiv +; CHECK: blr +} + +define float @test_fmul(float %a, float %b) { + entry: + %v = fmul float %a, %b + ret float %v +; CHECK-LABEL @test_fmul +; CHECK: efsmul +; CHECK: blr +} + +define float @test_fadd(float %a, float %b) { + entry: + %v = fadd float %a, %b + ret float %v +; CHECK-LABEL @test_fadd +; CHECK: efsadd +; CHECK: blr +} + +define float @test_fsub(float %a, float %b) { + entry: + %v = fsub float %a, %b + ret float %v +; CHECK-LABEL @test_fsub +; CHECK: efssub +; CHECK: blr +} + +define float @test_fneg(float %a) { + entry: + %v = fsub float -0.0, %a + ret float %v + +; CHECK-LABEL @test_fneg +; CHECK: efsneg +; CHECK: blr +} + +define float @test_dtos(double %a) { + entry: + %v = fptrunc double %a to float + ret float %v +; CHECK-LABEL: test_dtos +; CHECK: efscfd +; CHECK: blr +} + +define i1 @test_fcmpgt(float %a, float %b) { + entry: + %r = fcmp ogt float %a, %b + ret i1 %r +} + +define i1 @test_fcmpeq(float %a, float %b) { + entry: + %r = fcmp oeq float %a, %b + ret i1 %r +} + +define i1 @test_fcmplt(float %a, float %b) { + entry: + %r = fcmp olt float %a, %b + ret i1 %r +} + +define i32 @test_ftoui(float %a) { + %v = fptoui float %a to i32 + ret i32 %v +} + +define i32 @test_ftosi(float %a) { + %v = fptosi float %a to i32 + ret i32 %v +} + +define float @test_ffromui(i32 %a) { + %v = uitofp i32 %a to float + ret float %v +} + +define float @test_ffromsi(i32 %a) { + %v = sitofp i32 %a to float + ret float %v +} + +; Double tests + +define double @test_double_abs(double %aa) #0 { + +; CHECK-LABEL: test_double_abs + + entry: + %0 = tail call double @llvm.fabs.f64(double %aa) #2 + ret double %0 +} +; CHECK: blr + +; Function Attrs: nounwind readnone +declare double @llvm.fabs.f64(double) #1 + +define double @test_dnabs(double %aa) #0 { + entry: + %0 = tail call double @llvm.fabs.f64(double %aa) #2 + %sub = fsub double -0.000000e+00, %0 + ;%add2 = fadd double %aa, %sub + ;ret double %add2 + %add = fadd double %sub, 1.0 + ret double %add +} +; CHECK-LABEL: @test_dnabs +; CHECK-NO: efdnabs +; CHECK: blr + +define double @test_ddiv(double %a, double %b) { +entry: + %v = fdiv double %a, %b + ret double %v + +; CHECK-LABEL: test_ddiv +; CHECK: efddiv +; CHECK: blr +} + +define double @test_dmul(double %a, double %b) { + entry: + %v = fmul double %a, %b + ret double %v +; CHECK-LABEL @test_dmul +; CHECK: efdmul +; CHECK: blr +} + +define double @test_dadd(double %a, double %b) { + entry: + %v = fadd double %a, %b + ret double %v +; CHECK-LABEL @test_dadd +; CHECK: efdadd +; CHECK: blr +} + +define double @test_dsub(double %a, double %b) { + entry: + %v = fsub double %a, %b + ret double %v +; CHECK-LABEL @test_dsub +; CHECK: efdsub +; CHECK: blr +} + +define double @test_dneg(double %a) { + entry: + %v = fsub double -0.0, %a + ret double %v + +; CHECK-LABEL @test_dneg +; CHECK: blr +} + +define double @test_stod(float %a) { + entry: + %v = fpext float %a to double + ret double %v +; CHECK-LABEL: test_stod +; CHECK: efdcfs +; CHECK: blr +} + +define i1 @test_dcmpgt(double %a, double %b) { + entry: + %r = fcmp ogt double %a, %b + ret i1 %r +} + +define i1 @test_dcmpeq(double %a, double %b) { + entry: + %r = fcmp oeq double %a, %b + ret i1 %r +} + +define i1 @test_dcmplt(double %a, double %b) { + entry: + %r = fcmp olt double %a, %b + ret i1 %r +} + +define i32 @test_dtoui(double %a) { + %v = fptoui double %a to i32 + ret i32 %v +} + +define i32 @test_dtosi(double %a) { + %v = fptosi double %a to i32 + ret i32 %v +} + +define double @test_dfromui(i32 %a) { + %v = uitofp i32 %a to double + ret double %v +} + +define double @test_dfromsi(i32 %a) { + %v = sitofp i32 %a to double + ret double %v +} + +; Vector float tests + +define <2 x float> @test_float_abs_v(<2 x float> %aa) #0 { + +; CHECK-LABEL: test_float_abs_v + + entry: + %0 = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %aa) #2 + ret <2 x float> %0 +} +; Function Attrs: nounwind readnone +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1 + +; CHECK: evfsabs 3, 3 +; CHECK: blr + +define <2 x float> @test2_float_abs_v(<2 x float> %aa) #0 { + +; CHECK-LABEL: test2_float_abs_v + + entry: + %0 = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %aa) #2 + %sub = fsub <2 x float> , %0 + ret <2 x float> %sub +} + +; CHECK: evfsnabs 3, 3 +; CHECK: blr + +define <2 x float> @test_fneg_v(<2 x float> %a) { + entry: + %v = fsub <2 x float> , %a + ret <2 x float> %v + +; CHECK-LABEL @test_fneg_v +; CHECK: evfsneg +; CHECK: blr +} + +define <2 x float> @test_fdiv_v(<2 x float> %a, <2 x float> %b) { +entry: + %v = fdiv <2 x float> %a, %b + ret <2 x float> %v + +; CHECK-LABEL: test_fdiv_v +; CHECK: evfsdiv +; CHECK: blr +} + +define <2 x float> @test_fmul_v(<2 x float> %a, <2 x float> %b) { + entry: + %v = fmul <2 x float> %a, %b + ret <2 x float> %v +; CHECK-LABEL @test_fmul_v +; CHECK: evfsmul +; CHECK: blr +} + +define <2 x float> @test_fadd_v(<2 x float> %a, <2 x float> %b) { + entry: + %v = fadd <2 x float> %a, %b + ret <2 x float> %v +; CHECK-LABEL @test_fadd_v +; CHECK: evfsadd +; CHECK: blr +} + +define <2 x float> @test_fsub_v(<2 x float> %a, <2 x float> %b) { + entry: + %v = fsub <2 x float> %a, %b + ret <2 x float> %v +; CHECK-LABEL @test_fsub_v +; CHECK: evfssub +; CHECK: blr +} + +;define <2 x i1> @test_fcmpgt_v(<2 x float> %a, <2 x float> %b) { +; entry: +; %r = fcmp ogt <2 x float> %a, %b +; ret <2 x i1> %r +;} +; +;define <2 x i1> @test_fcmpeq_v(<2 x float> %a, <2 x float> %b) { +; entry: +; %r = fcmp oeq <2 x float> %a, %b +; ret <2 x i1> %r +;} +; +;define <2 x i1> @test_fcmplt_v(<2 x float> %a, <2 x float> %b) { +; entry: +; %r = fcmp olt <2 x float> %a, %b +; ret <2 x i1> %r +;} +; +define <2 x i32> @test_ftoui_v(<2 x float> %a) { + %v = fptoui <2 x float> %a to <2 x i32> + ret <2 x i32> %v +} + +define <2 x i32> @test_ftosi_v(<2 x float> %a) { + %v = fptosi <2 x float> %a to <2 x i32> + ret <2 x i32> %v +} + +define <2 x float> @test_ffromui_v(<2 x i32> %a) { + %v = uitofp <2 x i32> %a to <2 x float> + ret <2 x float> %v +} + +define <2 x float> @test_ffromsi_v(<2 x i32> %a) { + %v = sitofp <2 x i32> %a to <2 x float> + ret <2 x float> %v +} + +; Vector int tests + +define <2 x i32> @test_i32_abs_v(<2 x i32> %aa) #0 { + +; CHECK-LABEL: test_i32_abs_v + + entry: + %0 = tail call <2 x i32> @llvm.ppc.spe.evabs(<2 x i32> %aa) #2 + ret <2 x i32> %0 +} + +declare <2 x i32> @llvm.ppc.spe.evabs(<2 x i32>) #1 + +; CHECK: evabs 3, 3 +; CHECK: blr + +define <2 x i32> @test_neg_v(<2 x i32> %a) { + entry: + %v = sub <2 x i32> zeroinitializer, %a + ret <2 x i32> %v + +; CHECK-LABEL @test_neg_v +; CHECK: evneg +; CHECK: blr +} + +define <2 x i32> @test_nor_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = or <2 x i32> %a, %b + %r = xor <2 x i32> %v, + ret <2 x i32> %r + +; CHECK-LABEL @test_nor_v +; CHECK: evnor +; CHECK: blr +} + +define <2 x i32> @test_orc_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = xor <2 x i32> %b, + %r = or <2 x i32> %a, %v + ret <2 x i32> %r + +; CHECK-LABEL @test_orc_v +; CHECK: evorc +; CHECK: blr +} + +define <2 x i32> @test_nand_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = and <2 x i32> %a, %b + %r = xor <2 x i32> %v, + ret <2 x i32> %r + +; CHECK-LABEL @test_nand_v +; CHECK: evnand +; CHECK: blr +} + +define <2 x i32> @test_andc_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = xor <2 x i32> %b, + %r = and <2 x i32> %a, %v + ret <2 x i32> %r + +; CHECK-LABEL @test_andc_v +; CHECK: evandc +; CHECK: blr +} + +define <2 x i32> @test_xor_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = xor <2 x i32> %a, %b + ret <2 x i32> %v + +; CHECK-LABEL @test_xor_v +; CHECK: evxor +; CHECK: blr +} + +define <2 x i32> @test_slw_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = shl <2 x i32> %a, %b + ret <2 x i32> %v +} + +define <2 x i32> @test_srws_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = ashr <2 x i32> %a, %b + ret <2 x i32> %v +} + +define <2 x i32> @test_srwu_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = lshr <2 x i32> %a, %b + ret <2 x i32> %v +} + +define <2 x i32> @test_divs_v(<2 x i32> %a, <2 x i32> %b) { +entry: + %v = sdiv <2 x i32> %a, %b + ret <2 x i32> %v + +; CHECK-LABEL: test_divs_v +; CHECK: evdivws +; CHECK: blr +} + +define <2 x i32> @test_divu_v(<2 x i32> %a, <2 x i32> %b) { +entry: + %v = udiv <2 x i32> %a, %b + ret <2 x i32> %v + +; CHECK-LABEL: test_divu_v +; CHECK: evdivwu +; CHECK: blr +} + +define <2 x i32> @test_mul_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = mul <2 x i32> %a, %b + ret <2 x i32> %v +; CHECK-LABEL @test_mul_v +; CHECK: evmwlumi +; CHECK: blr +} + +define <2 x i32> @test_add_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = add <2 x i32> %a, %b + ret <2 x i32> %v +; CHECK-LABEL @test_add_v +; CHECK: evadd +; CHECK: blr +} + +define <2 x i32> @test_sub_v(<2 x i32> %a, <2 x i32> %b) { + entry: + %v = sub <2 x i32> %a, %b + ret <2 x i32> %v +; CHECK-LABEL @test_sub_v +; CHECK: evsubf +; CHECK: blr +} + +;define <2 x i1> @test_cmpgt_v(<2 x i32> %a, <2 x i32> %b) { +; entry: +; %r = cmp ogt <2 x i32> %a, %b +; ret <2 x i1> %r +;} +; +;define <2 x i1> @test_cmpeq_v(<2 x i32> %a, <2 x i32> %b) { +; entry: +; %r = cmp oeq <2 x i32> %a, %b +; ret <2 x i1> %r +;} +; +;define <2 x i1> @test_cmplt_v(<2 x i32> %a, <2 x i32> %b) { +; entry: +; %r = cmp olt <2 x i32> %a, %b +; ret <2 x i1> %r +;} +; Index: test/MC/PowerPC/ppc64-encoding-spe.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-spe.s +++ test/MC/PowerPC/ppc64-encoding-spe.s @@ -1,5 +1,5 @@ -# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s -# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s +# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s # Instructions from the Signal Processing Engine extension: @@ -620,3 +620,157 @@ # CHECK-BE: evstwwo 14, 124(9) # encoding: [0x11,0xc9,0xfb,0x3d] # CHECK-LE: evstwwo 14, 124(9) # encoding: [0x3d,0xfb,0xc9,0x11] evstwwo %r14, 124(%r9) + +# CHECK-BE: efdabs 3, 4 # encoding: [0x10,0x64,0x02,0xe4] +# CHECK-LE: efdabs 3, 4 # encoding: [0xe4,0x02,0x64,0x10] + efdabs %r3, %r4 +# CHECK-BE: efdadd 3, 4, 5 # encoding: [0x10,0x64,0x2a,0xe0] +# CHECK-LE: efdadd 3, 4, 5 # encoding: [0xe0,0x2a,0x64,0x10] + efdadd %r3, %r4, %r5 +# CHECK-BE: efdcfs 3, 4 # encoding: [0x10,0x60,0x22,0xef] +# CHECK-LE: efdcfs 3, 4 # encoding: [0xef,0x22,0x60,0x10] + efdcfs %r3, %r4 +# CHECK-BE: efdcfsf 5, 6 # encoding: [0x10,0xa0,0x32,0xf3] +# CHECK-LE: efdcfsf 5, 6 # encoding: [0xf3,0x32,0xa0,0x10] + efdcfsf %r5, %r6 +# CHECK-BE: efdcfsi 5, 6 # encoding: [0x10,0xa0,0x32,0xf1] +# CHECK-LE: efdcfsi 5, 6 # encoding: [0xf1,0x32,0xa0,0x10] + efdcfsi %r5, %r6 +# CHECK-BE: efdcfsid 10, 14 # encoding: [0x11,0x40,0x72,0xe3] +# CHECK-LE: efdcfsid 10, 14 # encoding: [0xe3,0x72,0x40,0x11] + efdcfsid %r10, %r14 +# CHECK-BE: efdcfuf 5, 8 # encoding: [0x10,0xa0,0x42,0xf2] +# CHECK-LE: efdcfuf 5, 8 # encoding: [0xf2,0x42,0xa0,0x10] + efdcfuf %r5, %r8 +# CHECK-BE: efdcfui 6, 9 # encoding: [0x10,0xc0,0x4a,0xf0] +# CHECK-LE: efdcfui 6, 9 # encoding: [0xf0,0x4a,0xc0,0x10] + efdcfui %r6, %r9 +# CHECK-BE: efdcfuid 7, 10 # encoding: [0x10,0xe0,0x52,0xe2] +# CHECK-LE: efdcfuid 7, 10 # encoding: [0xe2,0x52,0xe0,0x10] + efdcfuid %r7, %r10 +# CHECK-BE: efdcmpeq 3, 3, 8 # encoding: [0x11,0x83,0x42,0xee] +# CHECK-LE: efdcmpeq 3, 3, 8 # encoding: [0xee,0x42,0x83,0x11] + efdcmpeq %cr3, %r3, %r8 +# CHECK-BE: efdcmpgt 4, 7, 3 # encoding: [0x12,0x07,0x1a,0xec] +# CHECK-LE: efdcmpgt 4, 7, 3 # encoding: [0xec,0x1a,0x07,0x12] + efdcmpgt %cr4, %r7, %r3 +# CHECK-BE: efdcmplt 2, 3, 4 # encoding: [0x11,0x03,0x22,0xed] +# CHECK-LE: efdcmplt 2, 3, 4 # encoding: [0xed,0x22,0x03,0x11] + efdcmplt %cr2, %r3, %r4 +# CHECK-BE: efdctsf 5, 3 # encoding: [0x10,0xa0,0x1a,0xf7] +# CHECK-LE: efdctsf 5, 3 # encoding: [0xf7,0x1a,0xa0,0x10] + efdctsf %r5, %r3 +# CHECK-BE: efdctsi 6, 4 # encoding: [0x10,0xc0,0x22,0xf5] +# CHECK-LE: efdctsi 6, 4 # encoding: [0xf5,0x22,0xc0,0x10] + efdctsi %r6, %r4 +# CHECK-BE: efdctsidz 3, 4 # encoding: [0x10,0x60,0x22,0xeb] +# CHECK-LE: efdctsidz 3, 4 # encoding: [0xeb,0x22,0x60,0x10] + efdctsidz %r3, %r4 +# CHECK-BE: efdctsiz 3, 4 # encoding: [0x10,0x60,0x22,0xfa] +# CHECK-LE: efdctsiz 3, 4 # encoding: [0xfa,0x22,0x60,0x10] + efdctsiz %r3, %r4 +# CHECK-BE: efdctuf 5, 8 # encoding: [0x10,0xa0,0x42,0xf6] +# CHECK-LE: efdctuf 5, 8 # encoding: [0xf6,0x42,0xa0,0x10] + efdctuf %r5, %r8 +# CHECK-BE: efdctui 9, 10 # encoding: [0x11,0x20,0x52,0xf4] +# CHECK-LE: efdctui 9, 10 # encoding: [0xf4,0x52,0x20,0x11] + efdctui %r9, %r10 +# CHECK-BE: efdctuidz 3, 8 # encoding: [0x10,0x60,0x42,0xea] +# CHECK-LE: efdctuidz 3, 8 # encoding: [0xea,0x42,0x60,0x10] + efdctuidz %r3, %r8 +# CHECK-BE: efdctuiz 5, 17 # encoding: [0x10,0xa0,0x8a,0xf8] +# CHECK-LE: efdctuiz 5, 17 # encoding: [0xf8,0x8a,0xa0,0x10] + efdctuiz %r5, %r17 +# CHECK-BE: efddiv 3, 4, 5 # encoding: [0x10,0x64,0x2a,0xe9] +# CHECK-LE: efddiv 3, 4, 5 # encoding: [0xe9,0x2a,0x64,0x10] + efddiv %r3, %r4, %r5 +# CHECK-BE: efdmul 0, 3, 8 # encoding: [0x10,0x03,0x42,0xe8] +# CHECK-LE: efdmul 0, 3, 8 # encoding: [0xe8,0x42,0x03,0x10] + efdmul %r0, %r3, %r8 +# CHECK-BE: efdnabs 3, 23 # encoding: [0x10,0x77,0x02,0xe5] +# CHECK-LE: efdnabs 3, 23 # encoding: [0xe5,0x02,0x77,0x10] + efdnabs %r3, %r23 +# CHECK-BE: efdneg 3, 22 # encoding: [0x10,0x76,0x02,0xe6] +# CHECK-LE: efdneg 3, 22 # encoding: [0xe6,0x02,0x76,0x10] + efdneg %r3, %r22 +# CHECK-BE: efdsub 3, 4, 6 # encoding: [0x10,0x64,0x32,0xe1] +# CHECK-LE: efdsub 3, 4, 6 # encoding: [0xe1,0x32,0x64,0x10] + efdsub %r3, %r4, %r6 +# CHECK-BE: efdtsteq 3, 4, 5 # encoding: [0x11,0x84,0x2a,0xfe] +# CHECK-LE: efdtsteq 3, 4, 5 # encoding: [0xfe,0x2a,0x84,0x11] + efdtsteq %cr3, %r4, %r5 +# CHECK-BE: efdtstgt 3, 3, 6 # encoding: [0x11,0x83,0x32,0xfc] +# CHECK-LE: efdtstgt 3, 3, 6 # encoding: [0xfc,0x32,0x83,0x11] + efdtstgt %cr3, %r3, %r6 +# CHECK-BE: efdtstlt 4, 0, 3 # encoding: [0x12,0x00,0x1a,0xfd] +# CHECK-LE: efdtstlt 4, 0, 3 # encoding: [0xfd,0x1a,0x00,0x12] + efdtstlt %cr4, %r0, %r3 +# CHECK-BE: efsabs 3, 4 # encoding: [0x10,0x64,0x02,0xc4] +# CHECK-LE: efsabs 3, 4 # encoding: [0xc4,0x02,0x64,0x10] + efsabs %r3, %r4 +# CHECK-BE: efsadd 3, 4, 5 # encoding: [0x10,0x64,0x2a,0xc0] +# CHECK-LE: efsadd 3, 4, 5 # encoding: [0xc0,0x2a,0x64,0x10] + efsadd %r3, %r4, %r5 +# CHECK-BE: efscfsf 5, 6 # encoding: [0x10,0xa0,0x32,0xd3] +# CHECK-LE: efscfsf 5, 6 # encoding: [0xd3,0x32,0xa0,0x10] + efscfsf %r5, %r6 +# CHECK-BE: efscfsi 5, 6 # encoding: [0x10,0xa0,0x32,0xd1] +# CHECK-LE: efscfsi 5, 6 # encoding: [0xd1,0x32,0xa0,0x10] + efscfsi %r5, %r6 +# CHECK-BE: efscfuf 5, 8 # encoding: [0x10,0xa0,0x42,0xd2] +# CHECK-LE: efscfuf 5, 8 # encoding: [0xd2,0x42,0xa0,0x10] + efscfuf %r5, %r8 +# CHECK-BE: efscfui 6, 9 # encoding: [0x10,0xc0,0x4a,0xd0] +# CHECK-LE: efscfui 6, 9 # encoding: [0xd0,0x4a,0xc0,0x10] + efscfui %r6, %r9 +# CHECK-BE: efscmpeq 3, 3, 8 # encoding: [0x11,0x83,0x42,0xce] +# CHECK-LE: efscmpeq 3, 3, 8 # encoding: [0xce,0x42,0x83,0x11] + efscmpeq %cr3, %r3, %r8 +# CHECK-BE: efscmpgt 4, 7, 3 # encoding: [0x12,0x07,0x1a,0xcc] +# CHECK-LE: efscmpgt 4, 7, 3 # encoding: [0xcc,0x1a,0x07,0x12] + efscmpgt %cr4, %r7, %r3 +# CHECK-BE: efscmplt 2, 3, 4 # encoding: [0x11,0x03,0x22,0xcd] +# CHECK-LE: efscmplt 2, 3, 4 # encoding: [0xcd,0x22,0x03,0x11] + efscmplt %cr2, %r3, %r4 +# CHECK-BE: efsctsf 5, 3 # encoding: [0x10,0xa0,0x1a,0xd7] +# CHECK-LE: efsctsf 5, 3 # encoding: [0xd7,0x1a,0xa0,0x10] + efsctsf %r5, %r3 +# CHECK-BE: efsctsi 6, 4 # encoding: [0x10,0xc0,0x22,0xd5] +# CHECK-LE: efsctsi 6, 4 # encoding: [0xd5,0x22,0xc0,0x10] + efsctsi %r6, %r4 +# CHECK-BE: efsctsiz 3, 4 # encoding: [0x10,0x60,0x22,0xda] +# CHECK-LE: efsctsiz 3, 4 # encoding: [0xda,0x22,0x60,0x10] + efsctsiz %r3, %r4 +# CHECK-BE: efsctuf 5, 8 # encoding: [0x10,0xa0,0x42,0xd6] +# CHECK-LE: efsctuf 5, 8 # encoding: [0xd6,0x42,0xa0,0x10] + efsctuf %r5, %r8 +# CHECK-BE: efsctui 9, 10 # encoding: [0x11,0x20,0x52,0xd4] +# CHECK-LE: efsctui 9, 10 # encoding: [0xd4,0x52,0x20,0x11] + efsctui %r9, %r10 +# CHECK-BE: efsctuiz 5, 17 # encoding: [0x10,0xa0,0x8a,0xd8] +# CHECK-LE: efsctuiz 5, 17 # encoding: [0xd8,0x8a,0xa0,0x10] + efsctuiz %r5, %r17 +# CHECK-BE: efsdiv 3, 4, 5 # encoding: [0x10,0x64,0x2a,0xc9] +# CHECK-LE: efsdiv 3, 4, 5 # encoding: [0xc9,0x2a,0x64,0x10] + efsdiv %r3, %r4, %r5 +# CHECK-BE: efsmul 0, 3, 8 # encoding: [0x10,0x03,0x42,0xc8] +# CHECK-LE: efsmul 0, 3, 8 # encoding: [0xc8,0x42,0x03,0x10] + efsmul %r0, %r3, %r8 +# CHECK-BE: efsnabs 3, 23 # encoding: [0x10,0x77,0x02,0xc5] +# CHECK-LE: efsnabs 3, 23 # encoding: [0xc5,0x02,0x77,0x10] + efsnabs %r3, %r23 +# CHECK-BE: efsneg 3, 22 # encoding: [0x10,0x76,0x02,0xc6] +# CHECK-LE: efsneg 3, 22 # encoding: [0xc6,0x02,0x76,0x10] + efsneg %r3, %r22 +# CHECK-BE: efssub 3, 4, 6 # encoding: [0x10,0x64,0x32,0xc1] +# CHECK-LE: efssub 3, 4, 6 # encoding: [0xc1,0x32,0x64,0x10] + efssub %r3, %r4, %r6 +# CHECK-BE: efststeq 3, 4, 5 # encoding: [0x11,0x84,0x2a,0xde] +# CHECK-LE: efststeq 3, 4, 5 # encoding: [0xde,0x2a,0x84,0x11] + efststeq %cr3, %r4, %r5 +# CHECK-BE: efststgt 3, 3, 6 # encoding: [0x11,0x83,0x32,0xdc] +# CHECK-LE: efststgt 3, 3, 6 # encoding: [0xdc,0x32,0x83,0x11] + efststgt %cr3, %r3, %r6 +# CHECK-BE: efststlt 4, 0, 3 # encoding: [0x12,0x00,0x1a,0xdd] +# CHECK-LE: efststlt 4, 0, 3 # encoding: [0xdd,0x1a,0x00,0x12] + efststlt %cr4, %r0, %r3