Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -83,6 +83,16 @@ PPC::F24, PPC::F25, PPC::F26, PPC::F27, PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const MCPhysReg VFRegs[32] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static const MCPhysReg VRegs[32] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -103,14 +113,14 @@ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, - PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, - PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, - PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, - PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, - PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, - PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, - PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; static const MCPhysReg VSFRegs[64] = { PPC::F0, PPC::F1, PPC::F2, PPC::F3, @@ -590,6 +600,11 @@ Inst.addOperand(MCOperand::createReg(FRegs[getReg()])); } + void addRegVFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(VFRegs[getReg()])); + } + void addRegVRRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VRegs[getReg()])); Index: lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp =================================================================== --- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -89,6 +89,17 @@ PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const unsigned VFRegs[] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + static const unsigned VRegs[] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -110,14 +121,14 @@ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, - PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, - PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, - PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, - PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, - PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, - PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, - PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; static const unsigned VSFRegs[] = { @@ -242,6 +253,12 @@ return decodeRegisterClass(Inst, RegNo, FRegs); } +static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VFRegs); +} + static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" +#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" @@ -429,11 +430,32 @@ return RegName; } +static bool isVFRegister(unsigned Reg) { + return Reg >= PPC::VF0 && Reg <= PPC::VF31; +} + +static bool isVRRegister(unsigned Reg) { + return Reg >= PPC::V0 && Reg <= PPC::V31; +} + void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - const char *RegName = getRegisterName(Op.getReg()); + unsigned Reg = Op.getReg(); + + // If this is a VSX instruction that uses vs32-vs63 register, the register + // will be V0-V31 (128-bit altivec) or VF0-VF31 (64-bit altivec), and their + // names are "v0-v31", so we need to map "v0-v31" to "vs32-vs63" + // (Please synchronize with PPCAsmPrinter::printOperand) + if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg)) { + if (isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); + } + + const char *RegName = getRegisterName(Reg); // The linux and AIX assembler does not take register prefixes. if (!isDarwinSyntax()) RegName = stripRegisterPrefix(RegName); Index: lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" @@ -350,6 +351,9 @@ return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } +static bool isVRRegister(unsigned Reg) { + return Reg >= PPC::V0 && Reg <= PPC::V31; +} unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, @@ -361,7 +365,14 @@ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Reg = MO.getReg(); + unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg); + + if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg)) + if (isVRRegister(Reg)) + Encode += 32; + + return Encode; } assert(MO.isImm() && Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -164,6 +164,14 @@ return RegName; } +static bool isVFRegister(unsigned Reg) { + return Reg >= PPC::VF0 && Reg <= PPC::VF31; +} + +static bool isVRRegister(unsigned Reg) { + return Reg >= PPC::V0 && Reg <= PPC::V31; +} + void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const DataLayout &DL = getDataLayout(); @@ -171,7 +179,20 @@ switch (MO.getType()) { case MachineOperand::MO_Register: { - const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); + unsigned Reg = MO.getReg(); + + // If this is a VSX instruction that uses vs32-vs63 register, the register + // will be V0-V31 (128-bit altivec) or VF0-VF31 (64-bit altivec), and their + // names are "v0-v31", so we need to map "v0-v31" to "vs32-vs63" + // (Please synchronize with PPCInstPrinter::printOperand) + if (MI->getDesc().TSFlags & PPCII::UseVSXReg) { + if (isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); + } + const char *RegName = PPCInstPrinter::getRegisterName(Reg); + // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget->isDarwin()) Index: lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- lib/Target/PowerPC/PPCCallingConv.td +++ lib/Target/PowerPC/PPCCallingConv.td @@ -65,11 +65,9 @@ // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -118,11 +116,9 @@ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> ]>; //===----------------------------------------------------------------------===// @@ -187,12 +183,9 @@ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, // The first 12 Vector arguments are passed in AltiVec registers. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, - VSH10, VSH11, VSH12, VSH13]>>>, CCDelegateTo ]>; @@ -281,6 +274,5 @@ (sequence "V%u", 0, 31))>; def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, - (sequence "VSL%u", 0, 31), - (sequence "VSH%u", 0, 31))>; + (sequence "VSL%u", 0, 31))>; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -2894,7 +2894,7 @@ break; case MVT::v2f64: case MVT::v2i64: - RC = &PPC::VSHRCRegClass; + RC = &PPC::VRRCRegClass; break; case MVT::v4f64: RC = &PPC::QFRCRegClass; @@ -3082,10 +3082,6 @@ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; @@ -3364,9 +3360,7 @@ // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { - unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? - MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : - MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { @@ -5006,10 +5000,6 @@ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; @@ -5441,13 +5431,7 @@ MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - - unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || - Arg.getSimpleValueType() == MVT::v2i64) ? - VSRH[VR_idx] : VR[VR_idx]; - ++VR_idx; - - RegsToPass.push_back(std::make_pair(VReg, Load)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { @@ -5465,12 +5449,7 @@ // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { - unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || - Arg.getSimpleValueType() == MVT::v2i64) ? - VSRH[VR_idx] : VR[VR_idx]; - ++VR_idx; - - RegsToPass.push_back(std::make_pair(VReg, Arg)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -38,6 +38,14 @@ let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; + /// Indicate the (VSX) instruction that uses VSX register (vs0-vs63), + /// instead of VMX register (v0-v31). + /// Because VSX can use both vs32-vs63 (before ISA3.0) and v0-v31 (since + /// ISA3.0), and we don't define new register class for this new VMX usage, + /// so we use this flag to distinguish it. + bits<1> UseVSXReg = 0; + let TSFlags{6} = UseVSXReg; + // Fields used for relation models. string BaseName = ""; @@ -62,6 +70,8 @@ class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +class UseVSXReg { bits<1> UseVSXReg = 1; } + // Two joined instructions; used to emit two adjacent instructions as one. // The itinerary from the first instruction is used for scheduling and // classification. Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -61,6 +61,15 @@ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit PPC970_BRU = 7 << PPC970_Shift // Branch Unit }; + +enum { + /// Shift count to bypass PPC970 flags + NewDef_Shift = 6, + + /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX + /// register (v0-v31). + UseVSXReg = 0x1 << NewDef_Shift +}; } // end namespace PPCII class PPCSubtarget; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -853,15 +853,6 @@ llvm_unreachable("nop VSX copy"); DestReg = SuperReg; - } else if (PPC::VRRCRegClass.contains(DestReg) && - PPC::VSRCRegClass.contains(SrcReg)) { - unsigned SuperReg = - TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass); - - if (VSXSelfCopyCrash && SrcReg == SuperReg) - llvm_unreachable("nop VSX copy"); - - DestReg = SuperReg; } else if (PPC::F8RCRegClass.contains(SrcReg) && PPC::VSRCRegClass.contains(DestReg)) { unsigned SuperReg = @@ -871,15 +862,6 @@ llvm_unreachable("nop VSX copy"); SrcReg = SuperReg; - } else if (PPC::VRRCRegClass.contains(SrcReg) && - PPC::VSRCRegClass.contains(DestReg)) { - unsigned SuperReg = - TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass); - - if (VSXSelfCopyCrash && DestReg == SuperReg) - llvm_unreachable("nop VSX copy"); - - SrcReg = SuperReg; } // Different class register copy @@ -1066,6 +1048,13 @@ PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); + // Because VRRC and VSRC use incompatible store/load instruction, and backend + // can spill VRRC to a frame then reload it by using VSRC, we have to make + // sure this is not going to happen, so when target has VSX, we just + // spill/reload VRRC by VSRC. + if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) + RC = &PPC::VSRCRegClass; + bool NonRI = false, SpillsVRS = false; if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI, SpillsVRS)) @@ -1178,6 +1167,13 @@ PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); + // Because VRRC and VSRC use incompatible store/load instruction, and backend + // can spill VRRC to a frame then reload it by using VSRC, we have to make + // sure this is not going to happen, so when target has VSX, we just + // spill/reload VRRC by VSRC. + if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) + RC = &PPC::VSRCRegClass; + bool NonRI = false, SpillsVRS = false; if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI, SpillsVRS)) Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -47,6 +47,13 @@ let ParserMatchClass = PPCRegVSSRCAsmOperand; } +def PPCRegVFRCAsmOperand : AsmOperandClass { + let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; +} +def vfrc : RegisterOperand { + let ParserMatchClass = PPCRegVFRCAsmOperand; +} + // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -89,6 +96,7 @@ let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +let UseVSXReg = 1 in { let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { @@ -779,6 +787,7 @@ [(set v4i32:$XT, (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; } // hasSideEffects +} // UseVSXReg = 1 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. @@ -1021,7 +1030,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. - let isCommutable = 1 in { + let isCommutable = 1, UseVSXReg = 1 in { def XXLEQV : XX3Form<60, 186, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxleqv $XT, $XA, $XB", IIC_VecGeneral, @@ -1031,11 +1040,12 @@ "xxlnand $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, v4i32:$XB)))]>; - } // isCommutable + } // isCommutable, UseVSXReg def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; + let UseVSXReg = 1 in { def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1063,6 +1073,7 @@ "stxsiwx $XT, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } // mayStore + } // UseVSXReg = 1 def : Pat<(f64 (extloadf32 xoaddr:$src)), (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; @@ -1090,6 +1101,7 @@ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + let UseVSXReg = 1 in { // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { def XSADDSP : XX3Form<60, 0, @@ -1215,9 +1227,11 @@ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // UseVSXReg = 1 } // AddedComplexity = 400 } // HasP8Vector +let UseVSXReg = 1 in { let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), @@ -1253,6 +1267,7 @@ []>, Requires<[In64BitMode]>; } // IsISA3_0, HasDirectMove +} // UseVSXReg = 1 /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -1823,6 +1838,7 @@ : X_RD5_XO5_RS5; + let UseVSXReg = 1 in { // [PO T XO B XO BX /] class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, list pattern> @@ -1841,6 +1857,7 @@ InstrItinClass itin, list pattern> : XX3Form; + } // end UseVSXReg = 1 // [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, @@ -1909,7 +1926,8 @@ // DP/QP Compare Exponents def XSCMPEXPDP : XX3Form_1<60, 59, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>, + UseVSXReg; def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; // DP Compare ==, >=, >, != @@ -1923,6 +1941,7 @@ IIC_FPCompare, []>; def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; + let UseVSXReg = 1 in { // Vector Compare Not Equal def XVCMPNEDP : XX3Form_Rc<60, 123, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), @@ -1940,12 +1959,13 @@ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>, isDOT; + } // end UseVSXReg = 1 //===--------------------------------------------------------------------===// // Quad-Precision Floating-Point Conversion Instructions: // Convert DP -> QP - def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>; + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>; // Round & Convert QP -> DP (dword[1] is set to zero) def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>; @@ -1958,8 +1978,8 @@ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; // Convert (Un)Signed DWord -> QP - def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>; - def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vsfrc, []>; + def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; + def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; //===--------------------------------------------------------------------===// // Round to Floating-Point Integer Instructions @@ -1996,7 +2016,7 @@ // Insert Exponent DP/QP // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), - "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg; // vB NOTE: only vB.dword[0] is used, that's why we don't use // X_VT5_VA5_VB5 form def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), @@ -2005,10 +2025,12 @@ // Extract Exponent/Significand DP/QP def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; + def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; // Vector Insert Word + let UseVSXReg = 1 in { // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB), @@ -2018,6 +2040,7 @@ def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; + } // end UseVSXReg = 1 // Vector Insert Exponent DP/SP def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, @@ -2034,23 +2057,27 @@ //===--------------------------------------------------------------------===// // Test Data Class SP/DP/QP + let UseVSXReg = 1 in { def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; + } // UseVSXReg = 1 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; // Vector Test Data Class SP/DP + let UseVSXReg = 1 in { def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>; + } // UseVSXReg = 1 //===--------------------------------------------------------------------===// @@ -2081,7 +2108,7 @@ // Vector Splat Immediate Byte def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), - "xxspltib $XT, $IMM8", IIC_VecPerm, []>; + "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg; //===--------------------------------------------------------------------===// // Vector/Scalar Load/Store Instructions @@ -2089,12 +2116,12 @@ let mayLoad = 1 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), - "lxv $XT, $src", IIC_LdStLFD, []>; + "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; // Load DWord - def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src), + def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; // Load SP from src, convert it to DP, and place in dword[0] - def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src), + def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different @@ -2102,7 +2129,7 @@ class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form; + !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg; // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>; @@ -2126,19 +2153,19 @@ let mayStore = 1 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), - "stxv $XT, $dst", IIC_LdStSTFD, []>; + "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; // Store DWord - def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; // Convert DP of dword[0] to SP, and Store to dst - def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; // [PO S RA RB XO SX] class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form; + !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg; // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, []>; @@ -2155,4 +2182,16 @@ def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; } // end mayStore + + // Prefer Power9 (aka Power v.3 instructions) + let AddedComplexity = 500 in { + def : Pat<(f64 (load iaddr:$src)), (LXSD iaddr:$src)>; + def : Pat<(f32 (load iaddr:$src)), + (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>; + def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>; + def : Pat<(store f64:$vS, iaddr:$dst), (STXSD $vS, iaddr:$dst)>; + def : Pat<(store f32:$vS, iaddr:$dst), + (STXSSP (COPY_TO_REGCLASS $vS, VFRC), iaddr:$dst)>; + } } // end HasP9Vector Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -304,7 +304,6 @@ case PPC::VRRCRegClassID: case PPC::VFRCRegClassID: case PPC::VSLRCRegClassID: - case PPC::VSHRCRegClassID: return 32 - DefaultSafety; case PPC::VSRCRegClassID: case PPC::VSFRCRegClassID: Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -17,7 +17,6 @@ def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; -def sub_128 : SubRegIndex<128>; } @@ -79,15 +78,6 @@ let SubRegIndices = [sub_64]; } -// VSRH - One of the 32 128-bit VSX registers that overlap with the vector -// registers. -class VSRH : PPCReg { - let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; - let HWEncoding{5} = 1; - let SubRegs = [SubReg]; - let SubRegIndices = [sub_128]; -} - // CR - One of the 8 4-bit condition registers class CR num, string n, list subregs> : PPCReg { let HWEncoding{2-0} = num; @@ -116,9 +106,14 @@ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } -// Floating-point vector subregisters (for VSX) +// 64-bit Floating-point vector subregisters (for VSX) +// Note: the register name is v0-v31, this is alias of vs32-vs63. Because VSX +// can use both v0-v31 (since ISA3.0) and vs32-vs63 (before ISA3.0). We only +// define one register class for both usage, and use custom c++ code to adjust +// asm printing and operand encoding. foreach Index = 0-31 in { - def VF#Index : VF; + def VF#Index : VF, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } // QPX Floating-point registers @@ -138,9 +133,11 @@ def VSL#Index : VSRL("F"#Index), "vs"#Index>, DwarfRegAlias("F"#Index)>; } -foreach Index = 0-31 in { - def VSH#Index : VSRH("V"#Index), "vs" # !add(Index, 32)>, - DwarfRegAlias("V"#Index)>; + +// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for +// asm printing. +foreach Index = 32-63 in { + def VSX#Index : PPCReg<"vs"#Index>; } // The reprsentation of r0 when treated as the constant 0. @@ -288,7 +285,7 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; -def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128, +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v2f64,v1i128,v4f32], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; @@ -298,14 +295,8 @@ def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, (add (sequence "VSL%u", 0, 13), (sequence "VSL%u", 31, 14))>; -def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, - (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7, - VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14, - VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30, - VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23, - VSH22, VSH21, VSH20)>; def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, - (add VSLRC, VSHRC)>; + (add VSLRC, VRRC)>; // Register classes for the 64-bit "scalar" VSX subregisters. def VFRC : RegisterClass<"PPC", [f64], 64, @@ -314,6 +305,7 @@ VF15, VF16, VF17, VF18, VF19, VF31, VF30, VF29, VF28, VF27, VF26, VF25, VF24, VF23, VF22, VF21, VF20)>; + def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; // Register class for single precision scalars in VSX registers Index: lib/Target/PowerPC/PPCVSXCopy.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXCopy.cpp +++ lib/Target/PowerPC/PPCVSXCopy.cpp @@ -103,11 +103,8 @@ // This is a copy *to* a VSX register from a non-VSX register. Changed = true; - const TargetRegisterClass *SrcRC = - IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : - &PPC::VSLRCRegClass; + const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI) || IsVSSReg(SrcMO.getReg(), MRI) || IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); @@ -118,8 +115,7 @@ .addImm(1) // add 1, not 0, because there is no implicit clearing // of the high bits. .addOperand(SrcMO) - .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 : - PPC::sub_64); + .addImm(PPC::sub_64); // The source of the original copy is now the new virtual register. SrcMO.setReg(NewVReg); @@ -128,13 +124,10 @@ // This is a copy *from* a VSX register to a non-VSX register. Changed = true; - const TargetRegisterClass *DstRC = - IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : - &PPC::VSLRCRegClass; + const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass; assert((IsF8Reg(DstMO.getReg(), MRI) || IsVSFReg(DstMO.getReg(), MRI) || - IsVSSReg(DstMO.getReg(), MRI) || - IsVRReg(DstMO.getReg(), MRI)) && + IsVSSReg(DstMO.getReg(), MRI)) && "Unknown destination for a VSX copy"); // Copy the VSX value into a new VSX register of the correct subclass. @@ -145,8 +138,7 @@ // Transform the original copy into a subregister extraction copy. SrcMO.setReg(NewVReg); - SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 : - PPC::sub_64); + SrcMO.setSubReg(PPC::sub_64); } } Index: test/CodeGen/PowerPC/dform-test.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/dform-test.ll @@ -0,0 +1,203 @@ +; RUN: llc < %s -march=ppc64 -mcpu=pwr9 -verify-machineinstrs | \ +; RUN: FileCheck %s -check-prefix=PWR9 -check-prefix=CHECK +; RUN: llc < %s -march=ppc64 -mcpu=pwr8 -verify-machineinstrs | \ +; RUN: FileCheck %s -check-prefix=PWR8 -check-prefix=CHECK + +; CHECK-LABEL: LXSD: +define void @LXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; v0 = vsx32 +; PWR9-DAG: lxsd 0 +; PWR9-DAG: lxsd 2 +; PWR9-DAG: lxsd 3 +; PWR9-DAG: lxsd 4 +; PWR9-DAG: lxssp 5 +; PWR9-DAG: xxmrghd 34, 37, 34 +; PWR9-DAG: xxmrghd 35, 37, 35 +; PWR9-DAG: xxmrghd 36, 37, 36 +; PWR9-DAG: xxmrghd 37, 37, 32 +; PWR8-NOT: lxsd{{[^x]}} +; PWR8-NOT: lxssp{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %0 = load double, double* %call, align 8 + %vecinit = insertelement <2 x double> , double %0, i32 1 + %arrayidx1 = getelementptr inbounds double, double* %call, i64 1 + %1 = load double, double* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x double> , double %1, i32 1 + %arrayidx3 = getelementptr inbounds double, double* %call, i64 2 + %2 = load double, double* %arrayidx3, align 8 + %vecinit4 = insertelement <2 x double> , double %2, i32 1 + %arrayidx5 = getelementptr inbounds double, double* %call, i64 3 + %3 = load double, double* %arrayidx5, align 8 + %vecinit6 = insertelement <2 x double> , double %3, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit2, <2 x double> %vecinit4, <2 x double> %vecinit6) + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare double* @getDoublePtr() + +; CHECK-LABEL: LXSSP: +define void @LXSSP(i32 zeroext %N) #0 { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + +; PWR9-DAG: lxssp 0 +; PWR9-DAG: lxssp 2 +; PWR9-DAG: lxssp 3 +; PWR9-DAG: lxssp 4 +; PWR9-DAG: lxssp 5 +; PWR9-DAG: xxmrghd 34, 37, 34 +; PWR9-DAG: xxmrghd 35, 37, 35 +; PWR9-DAG: xxmrghd 36, 37, 36 +; PWR9-DAG: xxmrghd 37, 37, 32 +; PWR8-NOT: lxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %0 = load float, float* %call, align 4 + %conv = fpext float %0 to double + %vecinit = insertelement <2 x double> , double %conv, i32 1 + %arrayidx1 = getelementptr inbounds float, float* %call, i64 1 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fpext float %1 to double + %vecinit3 = insertelement <2 x double> , double %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %call, i64 2 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fpext float %2 to double + %vecinit6 = insertelement <2 x double> , double %conv5, i32 1 + %arrayidx7 = getelementptr inbounds float, float* %call, i64 3 + %3 = load float, float* %arrayidx7, align 4 + %conv8 = fpext float %3 to double + %vecinit9 = insertelement <2 x double> , double %conv8, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit3, <2 x double> %vecinit6, <2 x double> %vecinit9) + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare float* @getFloatPtr() + +; CHECK-LABEL: STXSD +define void @STXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR8-NOT: stxsd{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + store double %vecext, double* %call, align 8 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %arrayidx4 = getelementptr inbounds double, double* %call, i64 1 + store double %vecext3, double* %arrayidx4, align 8 + %call5 = tail call <2 x double> @getVSX() + %vecext6 = extractelement <2 x double> %call5, i32 0 + %arrayidx7 = getelementptr inbounds double, double* %call, i64 2 + store double %vecext6, double* %arrayidx7, align 8 + %call8 = tail call <2 x double> @getVSX() + %vecext9 = extractelement <2 x double> %call8, i32 0 + %arrayidx10 = getelementptr inbounds double, double* %call, i64 3 + store double %vecext9, double* %arrayidx10, align 8 + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare <2 x double> @getVSX() + +define void @STXSSP(i32 zeroext %N) { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; v19 = vsx51 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR8-NOT: stxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + %conv = fptrunc double %vecext to float + store float %conv, float* %call, align 4 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %conv4 = fptrunc double %vecext3 to float + %arrayidx5 = getelementptr inbounds float, float* %call, i64 1 + store float %conv4, float* %arrayidx5, align 4 + %call6 = tail call <2 x double> @getVSX() + %vecext7 = extractelement <2 x double> %call6, i32 0 + %conv8 = fptrunc double %vecext7 to float + %arrayidx9 = getelementptr inbounds float, float* %call, i64 2 + store float %conv8, float* %arrayidx9, align 4 + %call10 = tail call <2 x double> @getVSX() + %vecext11 = extractelement <2 x double> %call10, i32 0 + %conv12 = fptrunc double %vecext11 to float + %arrayidx13 = getelementptr inbounds float, float* %call, i64 3 + store float %conv12, float* %arrayidx13, align 4 + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare void @passVSX(<2 x double>, <2 x double>, <2 x double>, <2 x double>) Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -63,7 +63,7 @@ ret <2 x i64> %splat.splat ; CHECK: mtvsrd {{[0-9]+}}, 3 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 -; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0 +; CHECK-LE: xxspltd 34, [[REG1]], 0 } ; Function Attrs: nounwind Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -714,18 +714,12 @@ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 ret <4 x float> %cond -; FIXME: This test (and the other v4f32 tests) should use the same bclr -; technique as the v2f64 tests below. - ; CHECK-LABEL: @testv4floatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -740,12 +734,9 @@ ; CHECK-LABEL: @testv4floatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -760,12 +751,9 @@ ; CHECK-LABEL: @testv4floatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -780,12 +768,9 @@ ; CHECK-LABEL: @testv4floatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -800,12 +785,9 @@ ; CHECK-LABEL: @testv4floateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -820,12 +802,9 @@ ; CHECK-LABEL: @testv4floatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -840,12 +819,9 @@ ; CHECK-LABEL: @testv4floatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -860,12 +836,9 @@ ; CHECK-LABEL: @testv4floatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -880,12 +853,9 @@ ; CHECK-LABEL: @testv4floatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -900,12 +870,9 @@ ; CHECK-LABEL: @testv4floatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } Index: test/CodeGen/PowerPC/sjlj.ll =================================================================== --- test/CodeGen/PowerPC/sjlj.ll +++ test/CodeGen/PowerPC/sjlj.ll @@ -66,7 +66,7 @@ ; CHECK-NOT: mfspr ; CHECK-DAG: stfd -; CHECK-DAG: stvx +; CHECK-DAG: stxvd2x ; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha ; CHECK-DAG: std 31, env_sigill@toc@l([[REG]]) @@ -82,7 +82,7 @@ ; CHECK: .LBB1_4: ; CHECK: lfd -; CHECK: lvx +; CHECK: lxvd2x ; CHECK: ld ; CHECK: blr @@ -93,11 +93,11 @@ ; CHECK: li 3, 0 ; CHECK-NOAV: @main -; CHECK-NOAV-NOT: stvx +; CHECK-NOAV-NOT: stxvd2x ; CHECK-NOAV: bcl ; CHECK-NOAV: mflr ; CHECK-NOAV: bl foo -; CHECK-NOAV-NOT: lvx +; CHECK-NOAV-NOT: lxvd2x ; CHECK-NOAV: blr } Index: test/CodeGen/PowerPC/vsx-args.ll =================================================================== --- test/CodeGen/PowerPC/vsx-args.ll +++ test/CodeGen/PowerPC/vsx-args.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s -; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck %s +; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -13,13 +13,23 @@ ; CHECK-LABEL: @main ; CHECK-DAG: vor [[V:[0-9]+]], 2, 2 -; CHECK-DAG: xxlor 34, 35, 35 -; CHECK-DAG: xxlor 35, 36, 36 +; CHECK-DAG: vor 2, 3, 3 +; CHECK-DAG: vor 3, 4, 4 ; CHECK-DAG: vor 4, [[V]], [[V]] -; CHECK-DAG: bl sv -; CHECK-DAG: lxvd2x [[VC:[0-9]+]], +; CHECK: bl sv +; CHECK: lxvd2x [[VC:[0-9]+]], ; CHECK: xvadddp 34, 34, [[VC]] ; CHECK: blr + +; CHECK-FISL-LABEL: @main +; CHECK-FISL: stxvd2x 34, 1, 3 +; CHECK-FISL: vor 2, 3, 3 +; CHECK-FISL: vor 3, 4, 4 +; CHECK-FISL: lxvd2x 36, 1, 3 +; CHECK-FISL: bl sv +; CHECK-FISL: lxvd2x [[VC:[0-9]+]], +; CHECK-FISL: xvadddp 34, 34, [[VC]] +; CHECK-FISL: blr } attributes #0 = { noinline nounwind readnone } Index: test/CodeGen/PowerPC/vsx-infl-copy1.ll =================================================================== --- test/CodeGen/PowerPC/vsx-infl-copy1.ll +++ test/CodeGen/PowerPC/vsx-infl-copy1.ll @@ -11,7 +11,15 @@ br label %vector.body ; CHECK-LABEL: @_Z8example9Pj -; CHECK: xxlor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] Index: test/CodeGen/PowerPC/vsx-p8.ll =================================================================== --- test/CodeGen/PowerPC/vsx-p8.ll +++ test/CodeGen/PowerPC/vsx-p8.ll @@ -34,8 +34,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test32u -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr } @@ -48,8 +47,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33u -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr } Index: test/CodeGen/PowerPC/vsx-spill-norwstore.ll =================================================================== --- test/CodeGen/PowerPC/vsx-spill-norwstore.ll +++ test/CodeGen/PowerPC/vsx-spill-norwstore.ll @@ -4,6 +4,7 @@ @.str1 = external unnamed_addr constant [5 x i8], align 1 @.str10 = external unnamed_addr constant [9 x i8], align 1 +@.v2f64 = external unnamed_addr constant <2 x double>, align 16 ; Function Attrs: nounwind define void @main() #0 { @@ -12,6 +13,7 @@ ; CHECK: stxvd2x entry: + %val = load <2 x double>, <2 x double>* @.v2f64, align 16 %0 = tail call <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8> ) #0 %1 = tail call <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8> ) #0 br i1 false, label %if.then.i68.i, label %check.exit69.i @@ -23,7 +25,7 @@ br i1 undef, label %if.then.i63.i, label %check.exit64.i if.then.i63.i: ; preds = %check.exit69.i - tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0 + tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0), <2 x double> %val) #0 br label %check.exit64.i check.exit64.i: ; preds = %if.then.i63.i, %check.exit69.i Index: test/CodeGen/PowerPC/vsx-vec-spill.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vsx-vec-spill.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=ppc64 -mattr=+vsx -verify-machineinstrs | \ +; RUN: FileCheck %s --check-prefix=VSX +; RUN: llc < %s -march=ppc64 -mattr=-vsx -verify-machineinstrs | \ +; RUN: FileCheck %s --check-prefix=NOVSX + +define <2 x double> @interleaving_VSX_VMX( + <2 x double> %a, <2 x double> %b, <2 x double> %c, + <2 x double> %d, <2 x double> %e, <2 x double> %f) { +entry: + tail call void asm sideeffect "# clobbers", + "~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind + tail call void @goo(<2 x double> %a) nounwind + %add = fadd <2 x double> %a, %b + %sub = fsub <2 x double> %a, %b + %mul = fmul <2 x double> %add, %sub + %add1 = fadd <2 x double> %c, %d + %sub2 = fsub <2 x double> %c, %d + %mul3 = fmul <2 x double> %add1, %sub2 + %add4 = fadd <2 x double> %mul, %mul3 + %add5 = fadd <2 x double> %e, %f + %sub6 = fsub <2 x double> %e, %f + %mul7 = fmul <2 x double> %add5, %sub6 + %add8 = fadd <2 x double> %add4, %mul7 + ret <2 x double> %add8 +; VSX-LABEL: interleaving_VSX_VMX +; VSX-NOT: stvx +; VSX-NOT: lvx + +; NOVSX-LABEL: interleaving_VSX_VMX +; NOVSX-NOT: stxvd2x +; NOVSX-NOT: lxvd2x +} + +declare void @goo(<2 x double>) Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -70,10 +70,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test5 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxlxor -; CHECK-FISL: vor 2 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test5 @@ -91,10 +88,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test6 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlxor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test6 @@ -112,10 +106,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test7 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlxor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test7 @@ -133,10 +124,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test8 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxlor -; CHECK-FISL: vor 2 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test8 @@ -154,10 +142,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test9 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test9 @@ -175,10 +160,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test10 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test10 @@ -196,10 +178,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test11 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxland -; CHECK-FISL: vor 2 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test11 @@ -217,10 +196,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test12 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxland 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test12 @@ -238,10 +214,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test13 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxland 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test13 @@ -260,11 +233,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test14 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: xxlor 0, 37, 36 -; CHECK-FISL: xxlnor 36, 37, 36 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlnor 34, 34, 35 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 ; CHECK-FISL: stxvd2x 0, 1, 0 @@ -286,17 +256,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test15 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlnor 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test15 @@ -315,17 +281,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test16 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlnor 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test16 @@ -344,13 +306,9 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test17 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: vspltisb 2, -1 -; CHECK-FISL: vor 0, 2, 2 -; CHECK-FISL: xxlxor 36, 36, 32 -; CHECK-FISL: xxland 36, 37, 36 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: vspltisb 4, -1 +; CHECK-FISL: xxlxor 35, 35, 36 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test17 @@ -370,17 +328,11 @@ ; CHECK-FISL-LABEL: @test18 ; CHECK-FISL: vspltisb 4, -1 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: xxlxor 37, 37, 32 -; CHECK-FISL: vor 4, 5, 5 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: xxlandc 37, 37, 32 -; CHECK-FISL: vor 2, 5, 5 +; CHECK-FISL: xxlxor 36, 35, 36 +; CHECK-FISL: xxlandc 34, 34, 35 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 4, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test18 @@ -400,17 +352,11 @@ ; CHECK-FISL-LABEL: @test19 ; CHECK-FISL: vspltisb 4, -1 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: xxlxor 37, 37, 32 -; CHECK-FISL: vor 4, 5, 5 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: xxlandc 37, 37, 32 -; CHECK-FISL: vor 2, 5, 5 +; CHECK-FISL: xxlxor 36, 35, 36 +; CHECK-FISL: xxlandc 34, 34, 35 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 4, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test19 @@ -429,19 +375,9 @@ ; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-REG: blr -; FIXME: The fast-isel code is pretty miserable for this one. - ; CHECK-FISL-LABEL: @test20 -; CHECK-FISL: vor 0, 5, 5 -; CHECK-FISL: vor 1, 4, 4 -; CHECK-FISL: vor 6, 3, 3 -; CHECK-FISL: vor 7, 2, 2 -; CHECK-FISL: vor 2, 1, 1 -; CHECK-FISL: vor 3, 0, 0 -; CHECK-FISL: vcmpequw 2, 2, 3 -; CHECK-FISL: vor 0, 2, 2 -; CHECK-FISL: xxsel 32, 38, 39, 32 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: vcmpequw {{[0-9]+}}, 4, 5 +; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test20 @@ -462,13 +398,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test21 -; CHECK-FISL: vor 0, 5, 5 -; CHECK-FISL: vor 1, 4, 4 -; CHECK-FISL: vor 6, 3, 3 -; CHECK-FISL: vor 7, 2, 2 -; CHECK-FISL: xvcmpeqsp 32, 33, 32 -; CHECK-FISL: xxsel 32, 38, 39, 32 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: xvcmpeqsp [[V1:[0-9]+]], 36, 37 +; CHECK-FISL: xxsel 34, 35, 34, [[V1]] ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test21 @@ -495,14 +426,14 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test22 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 32 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 32, 32 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 33 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37 ; CHECK-FISL-DAG: xxlnor ; CHECK-FISL-DAG: xxlnor ; CHECK-FISL-DAG: xxlor ; CHECK-FISL-DAG: xxlor -; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}} +; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test22 @@ -530,11 +461,7 @@ ; CHECK-FISL-LABEL: @test23 ; CHECK-FISL: vcmpequh 4, 4, 5 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: vor 1, 2, 2 -; CHECK-FISL: vor 6, 4, 4 -; CHECK-FISL: xxsel 32, 32, 33, 38 -; CHECK-FISL: vor 2, 0, +; CHECK-FISL: xxsel 34, 35, 34, 36 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test23 @@ -556,11 +483,7 @@ ; CHECK-FISL-LABEL: @test24 ; CHECK-FISL: vcmpequb 4, 4, 5 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: vor 1, 2, 2 -; CHECK-FISL: vor 6, 4, 4 -; CHECK-FISL: xxsel 32, 32, 33, 38 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: xxsel 34, 35, 34, 36 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test24 @@ -686,8 +609,6 @@ ; CHECK-FISL-LABEL: @test30 ; CHECK-FISL: lxvd2x 0, 0, 3 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: vor 2, 3, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test30 @@ -719,8 +640,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test32 -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test32 @@ -738,8 +658,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test33 @@ -774,8 +693,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33u -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test33u @@ -793,8 +711,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test34 -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test34 @@ -812,8 +729,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test35 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test35 @@ -1090,10 +1006,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test65 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: vcmpequw 4, 5, 4 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: vcmpequw 2, 2, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test65 @@ -1111,8 +1024,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test66 -; CHECK-FISL: vcmpequw {{[0-9]+}}, 5, 4 -; CHECK-FISL: xxlnor 34, {{[0-9]+}}, {{[0-9]+}} +; CHECK-FISL: vcmpequw 2, 2, 3 +; CHECK-FISL: xxlnor 34, 34, 34 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test66