Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -83,6 +83,16 @@ PPC::F24, PPC::F25, PPC::F26, PPC::F27, PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const MCPhysReg VFRegs[32] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static const MCPhysReg VRegs[32] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -103,14 +113,14 @@ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, - PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, - PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, - PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, - PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, - PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, - PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, - PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; static const MCPhysReg VSFRegs[64] = { PPC::F0, PPC::F1, PPC::F2, PPC::F3, @@ -597,6 +607,11 @@ Inst.addOperand(MCOperand::createReg(FRegs[getReg()])); } + void addRegVFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(VFRegs[getReg()])); + } + void addRegVRRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VRegs[getReg()])); Index: lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp =================================================================== --- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -89,6 +89,17 @@ PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const unsigned VFRegs[] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + static const unsigned VRegs[] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -110,14 +121,14 @@ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, - PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, - PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, - PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, - PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, - PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, - PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, - PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; static const unsigned VSFRegs[] = { @@ -242,6 +253,12 @@ return decodeRegisterClass(Inst, RegNo, FRegs); } +static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VFRegs); +} + static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" +#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" @@ -447,7 +448,7 @@ /// stripRegisterPrefix - This method strips the character prefix from a /// register name so that only the number is left. Used by for linux asm. static const char *stripRegisterPrefix(const char *RegName) { - if (FullRegNames) + if (FullRegNames || ShowVSRNumsAsVR) return RegName; switch (RegName[0]) { @@ -468,15 +469,26 @@ raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - const char *RegName = getRegisterName(Op.getReg()); - if (ShowVSRNumsAsVR) { - unsigned RegNum = Op.getReg(); - if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31) - O << 'v' << RegNum - PPC::VSH0; - else - O << RegName; - return; + unsigned Reg = Op.getReg(); + + /* There are VSX instructions that use VSX register numbering (vs0 - vs63) + as well as those that use VMX register numbering (v0 - v31 which + correspond to vs32 - vs63). If we have an instruction that uses VSX + numbering, we need to convert the VMX registers to VSX registers. + Namely, we print 32-63 when the instruction operates on one of the + VMX registers. We previously had the VSHRC register class for the + upper 32 VSX registers. + (Please synchronize with PPCAsmPrinter::printOperand) + */ + if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) && + !ShowVSRNumsAsVR) { + if (PPCInstrInfo::isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (PPCInstrInfo::isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); } + + const char *RegName = getRegisterName(Reg); // The linux and AIX assembler does not take register prefixes. if (!isDarwinSyntax()) RegName = stripRegisterPrefix(RegName); Index: lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" @@ -350,7 +351,6 @@ return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } - unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, @@ -361,7 +361,14 @@ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Reg = MO.getReg(); + unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg); + + if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg)) + if (PPCInstrInfo::isVRRegister(Reg)) + Encode += 32; + + return Encode; } assert(MO.isImm() && Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -169,7 +169,25 @@ switch (MO.getType()) { case MachineOperand::MO_Register: { - const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); + unsigned Reg = MO.getReg(); + + /* There are VSX instructions that use VSX register numbering (vs0 - vs63) + as well as those that use VMX register numbering (v0 - v31 which + correspond to vs32 - vs63). If we have an instruction that uses VSX + numbering, we need to convert the VMX registers to VSX registers. + Namely, we print 32-63 when the instruction operates on one of the + VMX registers. We previously had the VSHRC register class for the + upper 32 VSX registers. + (Please synchronize with PPCInstPrinter::printOperand) + */ + if (MI->getDesc().TSFlags & PPCII::UseVSXReg) { + if (PPCInstrInfo::isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (PPCInstrInfo::isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); + } + const char *RegName = PPCInstPrinter::getRegisterName(Reg); + // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget->isDarwin()) Index: lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- lib/Target/PowerPC/PPCCallingConv.td +++ lib/Target/PowerPC/PPCCallingConv.td @@ -68,11 +68,9 @@ // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -121,11 +119,9 @@ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> ]>; //===----------------------------------------------------------------------===// @@ -193,12 +189,9 @@ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, // The first 12 Vector arguments are passed in AltiVec registers. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfType<[v16i8, v8i16, v4i32, v2f64, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>>, - CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, - VSH10, VSH11, VSH12, VSH13]>>>, CCDelegateTo ]>; @@ -287,6 +280,5 @@ (sequence "V%u", 0, 31))>; def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, - (sequence "VSL%u", 0, 31), - (sequence "VSH%u", 0, 31))>; + (sequence "VSL%u", 0, 31))>; Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -50,6 +50,10 @@ /// unsigned integers. FCTIDUZ, FCTIWUZ, + /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in + /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. + VEXTS, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, @@ -365,6 +369,16 @@ /// destination 64-bit register. LFIWZX, + /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an + /// integer smaller than 64 bits into a VSR. The integer is zero-extended. + /// This can be used for converting loaded integers to floating point. + LXSIZX, + + /// STXSIX - The STXSI[bh]X instruction. The first operand is an input + /// chain, then an f64 value to store, then an address to store it to, + /// followed by a byte-width for the store. + STXSIX, + /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. /// Maps directly to an lxvd2x instruction that will be followed by /// an xxswapd. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -685,7 +685,7 @@ } if (Subtarget.isISA3_0() && Subtarget.hasDirectMove()) - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); } if (Subtarget.hasQPX()) { @@ -1075,6 +1075,9 @@ case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; + case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; + case PPCISD::STXSIX: return "PPCISD::STXSIX"; + case PPCISD::VEXTS: return "PPCISD::VEXTS"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -2986,7 +2989,7 @@ break; case MVT::v2f64: case MVT::v2i64: - RC = &PPC::VSHRCRegClass; + RC = &PPC::VRRCRegClass; break; case MVT::v4f64: RC = &PPC::QFRCRegClass; @@ -3169,10 +3172,6 @@ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; @@ -3448,9 +3447,7 @@ // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { - unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? - MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : - MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { @@ -5056,10 +5053,6 @@ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const MCPhysReg VSRH[] = { - PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, - PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 - }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; @@ -5486,13 +5479,7 @@ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); - - unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || - Arg.getSimpleValueType() == MVT::v2i64) ? - VSRH[VR_idx] : VR[VR_idx]; - ++VR_idx; - - RegsToPass.push_back(std::make_pair(VReg, Load)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { @@ -5510,12 +5497,7 @@ // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { - unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || - Arg.getSimpleValueType() == MVT::v2i64) ? - VSRH[VR_idx] : VR[VR_idx]; - ++VR_idx; - - RegsToPass.push_back(std::make_pair(VReg, Arg)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); @@ -7094,6 +7076,8 @@ } static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) { + if (BVN->isConstant()) + return false; if (BVN->getValueType(0) != Type) return false; auto OpZero = BVN->getOperand(0); @@ -7230,8 +7214,9 @@ auto OpZero = BVN->getOperand(0); bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD && BVN->isOnlyUserOf(OpZero.getNode()); - if (Subtarget.isISA3_0() && - isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat) + if (Subtarget.isISA3_0() && !CanLoadAndSplat && + (isNonConstSplatBV(BVN, MVT::v4i32) || + isNonConstSplatBV(BVN, MVT::v2i64))) return Op; return SDValue(); } @@ -10571,6 +10556,35 @@ SDLoc dl(N); SDValue Op(N, 0); + SDValue FirstOperand(Op.getOperand(0)); + bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && + (FirstOperand.getValueType() == MVT::i8 || + FirstOperand.getValueType() == MVT::i16); + if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { + int ByteWidth = FirstOperand.getValueType() == MVT::i8 ? 1 : 2; + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + bool DstDouble = Op.getValueType() == MVT::f64; + unsigned ConvOp = Signed ? + (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : + (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); + SDValue WidthConst = DAG.getIntPtrConstant(ByteWidth, dl, false); + LoadSDNode *LDN = cast(FirstOperand.getNode()); + SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; + SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, + DAG.getVTList(MVT::f64, MVT::Other), + Ops, MVT::i8, LDN->getMemOperand()); + + SDValue FP; + // For signed conversion, we need to sign-extend the value in the VSR + if (Signed) { + SDValue ExtOps[] = { Ld, WidthConst }; + SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); + FP = DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); + } else + FP = DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); + return FP; + } + // Don't handle ppc_fp128 here or i1 conversions. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); @@ -10783,10 +10797,14 @@ case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::STORE: { + EVT Op1VT = N->getOperand(1).getValueType(); + bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || + (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16)); + // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). if (Subtarget.hasSTFIWX() && !cast(N)->isTruncatingStore() && N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && - N->getOperand(1).getValueType() == MVT::i32 && + ValidTypeForStoreFltAsInt && N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { SDValue Val = N->getOperand(1).getOperand(0); if (Val.getValueType() == MVT::f32) { @@ -10796,15 +10814,30 @@ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - SDValue Ops[] = { - N->getOperand(0), Val, N->getOperand(2), - DAG.getValueType(N->getOperand(1).getValueType()) - }; + if (Op1VT == MVT::i32) { + SDValue Ops[] = { + N->getOperand(0), Val, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + + Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, + cast(N)->getMemoryVT(), + cast(N)->getMemOperand()); + } else { + unsigned ByteWidth = N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2; + SDValue WidthConst = DAG.getIntPtrConstant(ByteWidth, dl, false); + + SDValue Ops[] = { + N->getOperand(0), Val, N->getOperand(2), WidthConst, + DAG.getValueType(N->getOperand(1).getValueType()) + }; + Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, + DAG.getVTList(MVT::Other), Ops, + cast(N)->getMemoryVT(), + cast(N)->getMemOperand()); + } - Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, - cast(N)->getMemoryVT(), - cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -706,6 +706,12 @@ "vspltw $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; +let isCodeGenOnly = 1 in { + def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>; + def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>; +} def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>; def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>; @@ -1270,6 +1276,9 @@ class VX_VT5_EO5_VB5 xo, bits<5> eo, string opc, list pattern> : VXForm_RD5_XO5_RS5; +class VX_VT5_EO5_VB5s xo, bits<5> eo, string opc, list pattern> + : VXForm_RD5_XO5_RS5; // Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD] def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB), @@ -1292,6 +1301,13 @@ def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; +let isCodeGenOnly = 1 in { + def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>; + def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>; + def VEXTSB2Ds : VX_VT5_EO5_VB5s<1538, 24, "vextsb2d", []>; + def VEXTSH2Ds : VX_VT5_EO5_VB5s<1538, 25, "vextsh2d", []>; + def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>; +} // Vector Integer Negate def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>; Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -38,6 +38,14 @@ let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; + /// Indicate that the VSX instruction is to use VSX numbering/encoding. + /// Since ISA 3.0, there are scalar instructions that use the upper + /// half of the VSX register set only. Rather than adding further complexity + /// to the register class set, the VSX registers just include the Altivec + /// registers and this flag decides the numbering to be used for them. + bits<1> UseVSXReg = 0; + let TSFlags{6} = UseVSXReg; + // Fields used for relation models. string BaseName = ""; @@ -62,6 +70,8 @@ class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +class UseVSXReg { bits<1> UseVSXReg = 1; } + // Two joined instructions; used to emit two adjacent instructions as one. // The itinerary from the first instruction is used for scheduling and // classification. Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -61,6 +61,15 @@ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit PPC970_BRU = 7 << PPC970_Shift // Branch Unit }; + +enum { + /// Shift count to bypass PPC970 flags + NewDef_Shift = 6, + + /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX + /// register (v0-v31). + UseVSXReg = 0x1 << NewDef_Shift +}; } // end namespace PPCII class PPCSubtarget; @@ -273,6 +282,13 @@ // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; + + static bool isVFRegister(unsigned Reg) { + return Reg >= PPC::VF0 && Reg <= PPC::VF31; + } + static bool isVRRegister(unsigned Reg) { + return Reg >= PPC::V0 && Reg <= PPC::V31; + } }; } Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -859,15 +859,6 @@ llvm_unreachable("nop VSX copy"); DestReg = SuperReg; - } else if (PPC::VRRCRegClass.contains(DestReg) && - PPC::VSRCRegClass.contains(SrcReg)) { - unsigned SuperReg = - TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass); - - if (VSXSelfCopyCrash && SrcReg == SuperReg) - llvm_unreachable("nop VSX copy"); - - DestReg = SuperReg; } else if (PPC::F8RCRegClass.contains(SrcReg) && PPC::VSRCRegClass.contains(DestReg)) { unsigned SuperReg = @@ -877,15 +868,6 @@ llvm_unreachable("nop VSX copy"); SrcReg = SuperReg; - } else if (PPC::VRRCRegClass.contains(SrcReg) && - PPC::VSRCRegClass.contains(DestReg)) { - unsigned SuperReg = - TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass); - - if (VSXSelfCopyCrash && DestReg == SuperReg) - llvm_unreachable("nop VSX copy"); - - SrcReg = SuperReg; } // Different class register copy @@ -1073,6 +1055,13 @@ PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); + // Because VRRC and VSRC use incompatible store/load instruction, and backend + // can spill VRRC to a frame then reload it by using VSRC, we have to make + // sure this is not going to happen, so when target has VSX, we just + // spill/reload VRRC by VSRC. + if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) + RC = &PPC::VSRCRegClass; + bool NonRI = false, SpillsVRS = false; if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI, SpillsVRS)) @@ -1185,6 +1174,13 @@ PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasSpills(); + // Because VRRC and VSRC use incompatible store/load instruction, and backend + // can spill VRRC to a frame then reload it by using VSRC, we have to make + // sure this is not going to happen, so when target has VSX, we just + // spill/reload VRRC by VSRC. + if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) + RC = &PPC::VSRCRegClass; + bool NonRI = false, SpillsVRS = false; if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI, SpillsVRS)) Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -23,6 +23,15 @@ def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; +def SDT_PPCLxsizx : SDTypeProfile<1, 2, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; +def SDT_PPCstxsix : SDTypeProfile<0, 3, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; +def SDT_PPCVexts : SDTypeProfile<1, 2, [ + SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> +]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, @@ -108,6 +117,11 @@ [SDNPHasChain, SDNPMayLoad]>; def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad]>; +def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, + [SDNPHasChain, SDNPMayStore]>; +def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", @@ -445,6 +459,12 @@ def vrrc : RegisterOperand { let ParserMatchClass = PPCRegVRRCAsmOperand; } +def PPCRegVFRCAsmOperand : AsmOperandClass { + let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; +} +def vfrc : RegisterOperand { + let ParserMatchClass = PPCRegVFRCAsmOperand; +} def PPCRegCRBITRCAsmOperand : AsmOperandClass { let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; } Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -89,6 +89,17 @@ } } +// Instruction form with a single input register for instructions such as +// XXPERMDI. The reason for defining this is that specifying multiple chained +// operands (such as loads) to an instruction will perform both chained +// operations rather than coalescing them into a single register. This simply +// forces the instruction to use the same register for both inputs. +class XX3Form_2s opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XX3Form_2 { + let XB = XA; +} + def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; @@ -96,6 +107,7 @@ let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +let UseVSXReg = 1 in { let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { @@ -783,6 +795,9 @@ def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + let isCodeGenOnly = 1 in + def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA, u2imm:$DM), + "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; def XXSEL : XX4Form<60, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; @@ -797,7 +812,12 @@ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, [(set v4i32:$XT, (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; + let isCodeGenOnly = 1 in + def XXSPLTWs : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; } // hasSideEffects +} // UseVSXReg = 1 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. @@ -849,6 +869,12 @@ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; def : InstAlias<"xxswapd $XT, $XB", (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDIs vsrc:$XT, vfrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDIs vsrc:$XT, vfrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDIs vsrc:$XT, vfrc:$XB, 2)>; let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. @@ -1071,6 +1097,22 @@ } // AddedComplexity } // HasVSX +def ScalarLoads { + dag Li8 = (i32 (extloadi8 xoaddr:$src)); + dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); + dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); + dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); + dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); + + dag Li16 = (i32 (extloadi16 xoaddr:$src)); + dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); + dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); + dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); + dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); + + dag Li32 = (i32 (load xoaddr:$src)); +} + // The following VSX instructions were introduced in Power ISA 2.07 /* FIXME: if the operands are v2i64, these patterns will not match. we should define new patterns or otherwise match the same patterns @@ -1080,7 +1122,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. - let isCommutable = 1 in { + let isCommutable = 1, UseVSXReg = 1 in { def XXLEQV : XX3Form<60, 186, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxleqv $XT, $XA, $XB", IIC_VecGeneral, @@ -1090,11 +1132,12 @@ "xxlnand $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, v4i32:$XB)))]>; - } // isCommutable + } // isCommutable, UseVSXReg def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; + let UseVSXReg = 1 in { def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1122,6 +1165,7 @@ "stxsiwx $XT, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } // mayStore + } // UseVSXReg = 1 def : Pat<(f64 (extloadf32 xoaddr:$src)), (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; @@ -1149,6 +1193,7 @@ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + let UseVSXReg = 1 in { // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { def XSADDSP : XX3Form<60, 0, @@ -1273,6 +1318,7 @@ "xscvdpspn $XT, $XB", IIC_VecFP, []>; def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // UseVSXReg = 1 let Predicates = [IsLittleEndian] in { def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), @@ -1295,9 +1341,12 @@ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), + (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>; } // AddedComplexity = 400 } // HasP8Vector +let UseVSXReg = 1 in { let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), @@ -1332,6 +1381,7 @@ []>, Requires<[In64BitMode]>; } // IsISA3_0, HasDirectMove +} // UseVSXReg = 1 /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -1911,6 +1961,7 @@ : X_RD5_XO5_RS5; + let UseVSXReg = 1 in { // [PO T XO B XO BX /] class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, list pattern> @@ -1929,6 +1980,7 @@ InstrItinClass itin, list pattern> : XX3Form; + } // UseVSXReg = 1 // [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, @@ -1997,7 +2049,8 @@ // DP/QP Compare Exponents def XSCMPEXPDP : XX3Form_1<60, 59, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>, + UseVSXReg; def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; // DP Compare ==, >=, >, != @@ -2011,6 +2064,7 @@ IIC_FPCompare, []>; def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; + let UseVSXReg = 1 in { // Vector Compare Not Equal def XVCMPNEDP : XX3Form_Rc<60, 123, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), @@ -2028,12 +2082,13 @@ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>, isDOT; + } // UseVSXReg = 1 //===--------------------------------------------------------------------===// // Quad-Precision Floating-Point Conversion Instructions: // Convert DP -> QP - def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>; + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>; // Round & Convert QP -> DP (dword[1] is set to zero) def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>; @@ -2046,8 +2101,8 @@ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; // Convert (Un)Signed DWord -> QP - def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>; - def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vsfrc, []>; + def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; + def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; //===--------------------------------------------------------------------===// // Round to Floating-Point Integer Instructions @@ -2084,7 +2139,7 @@ // Insert Exponent DP/QP // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), - "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg; // vB NOTE: only vB.dword[0] is used, that's why we don't use // X_VT5_VA5_VB5 form def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), @@ -2093,10 +2148,12 @@ // Extract Exponent/Significand DP/QP def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; + def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; // Vector Insert Word + let UseVSXReg = 1 in { // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), @@ -2110,6 +2167,7 @@ def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; + } // UseVSXReg = 1 // Vector Insert Exponent DP/SP def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, @@ -2126,23 +2184,27 @@ //===--------------------------------------------------------------------===// // Test Data Class SP/DP/QP + let UseVSXReg = 1 in { def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; + } // UseVSXReg = 1 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; // Vector Test Data Class SP/DP + let UseVSXReg = 1 in { def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>; + } // UseVSXReg = 1 //===--------------------------------------------------------------------===// @@ -2173,7 +2235,7 @@ // Vector Splat Immediate Byte def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), - "xxspltib $XT, $IMM8", IIC_VecPerm, []>; + "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg; //===--------------------------------------------------------------------===// // Vector/Scalar Load/Store Instructions @@ -2181,12 +2243,12 @@ let mayLoad = 1 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), - "lxv $XT, $src", IIC_LdStLFD, []>; + "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; // Load DWord - def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src), + def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; // Load SP from src, convert it to DP, and place in dword[0] - def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src), + def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different @@ -2194,11 +2256,13 @@ class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form; + !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg; // Load as Integer Byte/Halfword & Zero Indexed - def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>; - def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, []>; + def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, + [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; + def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, + [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; // Load Vector Halfword*8/Byte*16 Indexed def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; @@ -2214,28 +2278,34 @@ // Load Vector Word & Splat Indexed def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; - } // end mayLoad + } // mayLoad let mayStore = 1 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), - "stxv $XT, $dst", IIC_LdStSTFD, []>; + "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; // Store DWord - def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; // Convert DP of dword[0] to SP, and Store to dst - def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; // [PO S RA RB XO SX] class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form; + !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg; // Store as Integer Byte/Halfword Indexed - def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, []>; - def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, []>; + def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, + [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; + def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, + [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; + let isCodeGenOnly = 1 in { + def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>; + def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>; + } // Store Vector Halfword*8/Byte*16 Indexed def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>; @@ -2248,7 +2318,7 @@ // Store Vector (Left-justified) with Length def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; - } // end mayStore + } // mayStore // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { @@ -2341,6 +2411,146 @@ (v4i32 (XXSPLTIB 255))>; def : Pat<(v2i64 immAllOnesV), (v2i64 (XXSPLTIB 255))>; + + // Build vectors from i8 loads + def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), + (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; + def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), + (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), + (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), + (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), + (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), + (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; + + // Build vectors from i16 loads + def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), + (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), + (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), + (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), + (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), + (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + + let Predicates = [IsBigEndian, HasP9Vector] in { + // Scalar stores of i8 + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>; + + // Scalar stores of i16 + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>; + } // IsBigEndian, HasP9Vector + + let Predicates = [IsLittleEndian, HasP9Vector] in { + // Scalar stores of i8 + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>; + + // Scalar stores of i16 + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>; + } // IsLittleEndian, HasP9Vector + + // Vector sign extensions + def : Pat<(f64 (PPCVexts f64:$A, 1)), + (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; + def : Pat<(f64 (PPCVexts f64:$A, 2)), + (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; } // end HasP9Vector, AddedComplexity let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -170,11 +170,68 @@ ToErase = &MI; Simplified = true; } + } else if ((Immed == 0 || Immed == 3) && + DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) { + // Splat fed by another splat - switch the output of the first + // and remove the second. + DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + DEBUG(dbgs() << "Removing redundant splat: "); + DEBUG(MI.dump()); } } } break; } + case PPC::VSPLTB: + case PPC::VSPLTH: + case PPC::XXSPLTW: { + unsigned MyOpcode = MI.getOpcode(); + unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; + unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); + MachineInstr *DefMI = MRI->getVRegDef(TrueReg); + if (!DefMI) + break; + unsigned DefOpcode = DefMI->getOpcode(); + bool SameOpcode = (MyOpcode == DefOpcode) || + (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || + (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || + (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs); + // Splat fed by another splat - switch the output of the first + // and remove the second. + if (SameOpcode) { + DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + DEBUG(dbgs() << "Removing redundant splat: "); + DEBUG(MI.dump()); + } + // Splat fed by a shift. Usually when we align value to splat into + // vector element zero. + if (DefOpcode == PPC::XXSLDWI) { + unsigned ShiftRes = DefMI->getOperand(0).getReg(); + unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); + unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); + unsigned ShiftImm = DefMI->getOperand(3).getImm(); + unsigned SplatImm = MI.getOperand(2).getImm(); + if (ShiftOp1 == ShiftOp2) { + unsigned NewElem = (SplatImm + ShiftImm) & 0x3; + if (MRI->hasOneNonDBGUse(ShiftRes)) { + DEBUG(dbgs() << "Removing redundant shift: "); + DEBUG(DefMI->dump()); + ToErase = DefMI; + } + Simplified = true; + DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << + " to " << NewElem << " in instruction: "); + DEBUG(MI.dump()); + MI.getOperand(1).setReg(ShiftOp1); + MI.getOperand(2).setImm(NewElem); + } + } + break; + } } } Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -303,7 +303,6 @@ case PPC::VRRCRegClassID: case PPC::VFRCRegClassID: case PPC::VSLRCRegClassID: - case PPC::VSHRCRegClassID: return 32 - DefaultSafety; case PPC::VSRCRegClassID: case PPC::VSFRCRegClassID: Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -17,7 +17,6 @@ def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; -def sub_128 : SubRegIndex<128>; } @@ -79,15 +78,6 @@ let SubRegIndices = [sub_64]; } -// VSRH - One of the 32 128-bit VSX registers that overlap with the vector -// registers. -class VSRH : PPCReg { - let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; - let HWEncoding{5} = 1; - let SubRegs = [SubReg]; - let SubRegIndices = [sub_128]; -} - // CR - One of the 8 4-bit condition registers class CR num, string n, list subregs> : PPCReg { let HWEncoding{2-0} = num; @@ -116,9 +106,12 @@ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } -// Floating-point vector subregisters (for VSX) +// 64-bit Floating-point subregisters of Altivec registers +// Note: the register names are v0-v31 or vs32-vs63 depending on the use. +// Custom C++ code is used to produce the correct name and encoding. foreach Index = 0-31 in { - def VF#Index : VF; + def VF#Index : VF, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } // QPX Floating-point registers @@ -138,9 +131,11 @@ def VSL#Index : VSRL("F"#Index), "vs"#Index>, DwarfRegAlias("F"#Index)>; } -foreach Index = 0-31 in { - def VSH#Index : VSRH("V"#Index), "vs" # !add(Index, 32)>, - DwarfRegAlias("V"#Index)>; + +// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for +// asm printing. +foreach Index = 32-63 in { + def VSX#Index : PPCReg<"vs"#Index>; } // The reprsentation of r0 when treated as the constant 0. @@ -288,7 +283,7 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; -def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128, +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v2f64,v1i128,v4f32], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; @@ -298,14 +293,8 @@ def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, (add (sequence "VSL%u", 0, 13), (sequence "VSL%u", 31, 14))>; -def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, - (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7, - VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14, - VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30, - VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23, - VSH22, VSH21, VSH20)>; def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, - (add VSLRC, VSHRC)>; + (add VSLRC, VRRC)>; // Register classes for the 64-bit "scalar" VSX subregisters. def VFRC : RegisterClass<"PPC", [f64], 64, Index: lib/Target/PowerPC/PPCVSXCopy.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXCopy.cpp +++ lib/Target/PowerPC/PPCVSXCopy.cpp @@ -101,11 +101,8 @@ // This is a copy *to* a VSX register from a non-VSX register. Changed = true; - const TargetRegisterClass *SrcRC = - IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : - &PPC::VSLRCRegClass; + const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI) || IsVSSReg(SrcMO.getReg(), MRI) || IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); @@ -116,8 +113,7 @@ .addImm(1) // add 1, not 0, because there is no implicit clearing // of the high bits. .addOperand(SrcMO) - .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 - : PPC::sub_64); + .addImm(PPC::sub_64); // The source of the original copy is now the new virtual register. SrcMO.setReg(NewVReg); @@ -126,13 +122,10 @@ // This is a copy *from* a VSX register to a non-VSX register. Changed = true; - const TargetRegisterClass *DstRC = - IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : - &PPC::VSLRCRegClass; + const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass; assert((IsF8Reg(DstMO.getReg(), MRI) || IsVSFReg(DstMO.getReg(), MRI) || - IsVSSReg(DstMO.getReg(), MRI) || - IsVRReg(DstMO.getReg(), MRI)) && + IsVSSReg(DstMO.getReg(), MRI)) && "Unknown destination for a VSX copy"); // Copy the VSX value into a new VSX register of the correct subclass. @@ -143,8 +136,7 @@ // Transform the original copy into a subregister extraction copy. SrcMO.setReg(NewVReg); - SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 : - PPC::sub_64); + SrcMO.setSubReg(PPC::sub_64); } } Index: test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll =================================================================== --- test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll +++ test/CodeGen/PowerPC/inline-asm-scalar-to-vector-error.ll @@ -7,8 +7,5 @@ ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type tail call void asm sideeffect "nop", "{vsl1}"(i32 %x) nounwind - ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type - tail call void asm sideeffect "nop", "{vsh1}"(i32 %x) nounwind - ret void } Index: test/CodeGen/PowerPC/load-v4i8-improved.ll =================================================================== --- test/CodeGen/PowerPC/load-v4i8-improved.ll +++ test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,8 +1,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \ -; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s \ -; RUN: --check-prefix=CHECK-BE +; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s define <16 x i8> @test(i32* %s, i32* %t) { entry: @@ -11,13 +10,6 @@ %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> ret <16 x i8> %2 ; CHECK-LABEL: test -; CHECK: lwz [[GPR:[0-9]+]], 0(3) -; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]] -; CHECK: xxswapd [[SWP:[0-9]+]], [[VSR]] -; CHECK: xxspltw 34, [[SWP]], 3 -; CHECK-BE-LABEL: test -; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3) -; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32 -; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]] -; CHECK-BE: xxspltw 34, [[VSR]], 0 +; CHECK: lxsiwax 34, 0, 3 +; CHECK: xxspltw 34, 34, 1 } Index: test/CodeGen/PowerPC/machine-combiner.ll =================================================================== --- test/CodeGen/PowerPC/machine-combiner.ll +++ test/CodeGen/PowerPC/machine-combiner.ll @@ -98,7 +98,6 @@ ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] -; CHECK-PWR: # kill ; CHECK-NEXT: blr %t0 = fadd <4 x float> %x0, %x1 @@ -116,7 +115,6 @@ ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] -; CHECK-PWR: # kill ; CHECK-NEXT: blr %t0 = fadd <4 x float> %x0, %x1 @@ -134,7 +132,6 @@ ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] -; CHECK-PWR: # kill ; CHECK-NEXT: blr %t0 = fadd <4 x float> %x0, %x1 @@ -152,7 +149,6 @@ ; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 ; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] -; CHECK-PWR: # kill ; CHECK-NEXT: blr %t0 = fadd <4 x float> %x0, %x1 Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -63,7 +63,7 @@ ret <2 x i64> %splat.splat ; CHECK: mtvsrd {{[0-9]+}}, 3 ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 -; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0 +; CHECK-LE: xxspltd 34, [[REG1]], 0 } ; Function Attrs: nounwind @@ -75,9 +75,10 @@ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat.splat -; CHECK: xscvdpspn {{[0-9]+}}, 1 +; CHECK: xscvdpspn [[REG1:[0-9]+]], 1 +; CHECK: xxspltw 34, [[REG1]] ; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1 -; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1 +; CHECK-LE: xxspltw 34, [[REG1]] } ; The optimization to remove stack operations from PPCDAGToDAGISel::Select Index: test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -7,10 +7,18 @@ define <2 x i64> @test1(i64 %a, i64 %b) { entry: +; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp +; which will happen in a subsequent patch. ; CHECK-LABEL: test1 -; CHECK: mtvsrdd 34, 4, 3 +; FIXME: mtvsrdd 34, 4, 3 +; CHECK: mtvsrd {{[0-9]+}}, 3 +; CHECK: mtvsrd {{[0-9]+}}, 4 +; CHECK: xxmrgld ; CHECK-BE-LABEL: test1 -; CHECK-BE: mtvsrdd 34, 3, 4 +; FIXME-BE: mtvsrdd 34, 3, 4 +; CHECK-BE: mtvsrd {{[0-9]+}}, 4 +; CHECK-BE: mtvsrd {{[0-9]+}}, 3 +; CHECK-BE: xxmrghd %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 ret <2 x i64> %vecins1 Index: test/CodeGen/PowerPC/ppc64-i128-abi.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -55,9 +55,12 @@ ; CHECK-LE: blr ; CHECK-P9-LABEL: @v1i128_increment_by_one -; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1 -; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0 -; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]] +; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed +; in a subsequent patch. +; FIXME: li [[R1:r[0-9]+]], 1 +; FIXME: li [[R2:r[0-9]+]], 0 +; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]] +; CHECK-P9: lxvx [[V1:v[0-9]+]] ; CHECK-P9: vadduqm v2, v2, [[V1]] ; CHECK-P9: blr Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -714,18 +714,12 @@ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 ret <4 x float> %cond -; FIXME: This test (and the other v4f32 tests) should use the same bclr -; technique as the v2f64 tests below. - ; CHECK-LABEL: @testv4floatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -740,12 +734,9 @@ ; CHECK-LABEL: @testv4floatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -760,12 +751,9 @@ ; CHECK-LABEL: @testv4floatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -780,12 +768,9 @@ ; CHECK-LABEL: @testv4floatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -800,12 +785,11 @@ ; CHECK-LABEL: @testv4floateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 35, 35 -; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 34, 34 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB1:[0-9_]+]] +; CHECK: vor 3, 2, 2 +; CHECK: .LBB[[BB1]] +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -820,12 +804,9 @@ ; CHECK-LABEL: @testv4floatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -840,12 +821,9 @@ ; CHECK-LABEL: @testv4floatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -860,12 +838,9 @@ ; CHECK-LABEL: @testv4floatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -880,12 +855,9 @@ ; CHECK-LABEL: @testv4floatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -900,12 +872,9 @@ ; CHECK-LABEL: @testv4floatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 -; CHECK: .LBB[[BB]]: -; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 ; CHECK: blr } @@ -1023,7 +992,7 @@ ; CHECK: bc 12, [[REG1]], .LBB[[BB55:[0-9_]+]] ; CHECK: vor 3, 2, 2 ; CHECK: .LBB[[BB55]] -; CHECK: xxlor 34, 35, 35 +; CHECK: vor 2, 3, 3 ; CHECK: blr } Index: test/CodeGen/PowerPC/sjlj.ll =================================================================== --- test/CodeGen/PowerPC/sjlj.ll +++ test/CodeGen/PowerPC/sjlj.ll @@ -66,7 +66,7 @@ ; CHECK-NOT: mfspr ; CHECK-DAG: stfd -; CHECK-DAG: stvx +; CHECK-DAG: stxvd2x ; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha ; CHECK-DAG: std 31, env_sigill@toc@l([[REG]]) @@ -82,7 +82,7 @@ ; CHECK: .LBB1_4: ; CHECK: lfd -; CHECK: lvx +; CHECK: lxvd2x ; CHECK: ld ; CHECK: blr @@ -93,11 +93,11 @@ ; CHECK: li 3, 0 ; CHECK-NOAV: @main -; CHECK-NOAV-NOT: stvx +; CHECK-NOAV-NOT: stxvd2x ; CHECK-NOAV: bcl ; CHECK-NOAV: mflr ; CHECK-NOAV: bl foo -; CHECK-NOAV-NOT: lvx +; CHECK-NOAV-NOT: lxvd2x ; CHECK-NOAV: blr } Index: test/CodeGen/PowerPC/vsx-args.ll =================================================================== --- test/CodeGen/PowerPC/vsx-args.ll +++ test/CodeGen/PowerPC/vsx-args.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | \ +; RUN: FileCheck -check-prefix=CHECK-FISL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -13,13 +14,23 @@ ; CHECK-LABEL: @main ; CHECK-DAG: vor [[V:[0-9]+]], 2, 2 -; CHECK-DAG: xxlor 34, 35, 35 -; CHECK-DAG: xxlor 35, 36, 36 +; CHECK-DAG: vor 2, 3, 3 +; CHECK-DAG: vor 3, 4, 4 ; CHECK-DAG: vor 4, [[V]], [[V]] -; CHECK-DAG: bl sv -; CHECK-DAG: lxvd2x [[VC:[0-9]+]], +; CHECK: bl sv +; CHECK: lxvd2x [[VC:[0-9]+]], ; CHECK: xvadddp 34, 34, [[VC]] ; CHECK: blr + +; CHECK-FISL-LABEL: @main +; CHECK-FISL: stxvd2x 34 +; CHECK-FISL: vor 2, 3, 3 +; CHECK-FISL: vor 3, 4, 4 +; CHECK-FISL: lxvd2x 36 +; CHECK-FISL: bl sv +; CHECK-FISL: lxvd2x [[VC:[0-9]+]], +; CHECK-FISL: xvadddp 34, 34, [[VC]] +; CHECK-FISL: blr } attributes #0 = { noinline nounwind readnone } Index: test/CodeGen/PowerPC/vsx-infl-copy1.ll =================================================================== --- test/CodeGen/PowerPC/vsx-infl-copy1.ll +++ test/CodeGen/PowerPC/vsx-infl-copy1.ll @@ -11,7 +11,15 @@ br label %vector.body ; CHECK-LABEL: @_Z8example9Pj -; CHECK: xxlor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor +; CHECK: vor vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] Index: test/CodeGen/PowerPC/vsx-p8.ll =================================================================== --- test/CodeGen/PowerPC/vsx-p8.ll +++ test/CodeGen/PowerPC/vsx-p8.ll @@ -34,8 +34,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test32u -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr } @@ -48,8 +47,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33u -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr } Index: test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll =================================================================== --- test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll +++ test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll @@ -0,0 +1,1132 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecucuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecucuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecucuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <8 x i16> @vecusuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i16 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK-LABEL: vecusuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vsplth 2, 2, 3 +; CHECK-BE-LABEL: vecusuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vsplth 2, 2, 3 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecuiuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecuiuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecuiuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @veculuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: veculuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: veculuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecscuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecscuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecscuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <8 x i16> @vecssuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i16 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK-LABEL: vecssuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vsplth 2, 2, 3 +; CHECK-BE-LABEL: vecssuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vsplth 2, 2, 3 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecsiuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecsiuc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecsiuc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @vecsluc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = zext i8 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: vecsluc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: vecsluc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @vecfuc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = uitofp i8 %0 to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK-LABEL: vecfuc +; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3 +; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] +; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-LABEL: vecfuc +; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] +; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @vecduc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = uitofp i8 %0 to double + %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0 + %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %splat.splat +; CHECK-LABEL: vecduc +; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3 +; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] +; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-LABEL: vecduc +; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] +; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecucsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecucsc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecucsc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecuisc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sext i8 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecuisc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2w 2, 2 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecuisc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2w 2, 2 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @veculsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sext i8 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: veculsc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2d 2, 2 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: veculsc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecscsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecscsc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecscsc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecsisc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sext i8 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecsisc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2w 2, 2 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecsisc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2w 2, 2 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @vecslsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sext i8 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: vecslsc +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2d 2, 2 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: vecslsc +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @vecfsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sitofp i8 %0 to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK-LABEL: vecfsc +; CHECK: lxsibzx +; CHECK-NEXT: vextsb2d +; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]], +; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-LABEL: vecfsc +; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: vextsb2d +; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]], +; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @vecdsc(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sitofp i8 %0 to double + %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0 + %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %splat.splat +; CHECK-LABEL: vecdsc +; CHECK: lxsibzx +; CHECK-NEXT: vextsb2d +; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]], +; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-LABEL: vecdsc +; CHECK-BE: lxsibzx +; CHECK-BE-NEXT: vextsb2d +; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]], +; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecucus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = trunc i16 %0 to i8 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecucus +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecucus +; CHECK-BE: li [[OFFSET:[0-9]+]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <8 x i16> @vecusus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK-LABEL: vecusus +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vsplth 2, 2, 3 +; CHECK-BE-LABEL: vecusus +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vsplth 2, 2, 3 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecuius(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = zext i16 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecuius +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecuius +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @veculus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = zext i16 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: veculus +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: veculus +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecscus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = trunc i16 %0 to i8 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecscus +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecscus +; CHECK-BE: li [[OFFSET:[0-9]+]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <8 x i16> @vecssus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK-LABEL: vecssus +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vsplth 2, 2, 3 +; CHECK-BE-LABEL: vecssus +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vsplth 2, 2, 3 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecsius(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = zext i16 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecsius +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecsius +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @vecslus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = zext i16 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: vecslus +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: vecslus +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @vecfus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = uitofp i16 %0 to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK-LABEL: vecfus +; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3 +; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] +; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-LABEL: vecfus +; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] +; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @vecdus(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = uitofp i16 %0 to double + %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0 + %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %splat.splat +; CHECK-LABEL: vecdus +; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3 +; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] +; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-LABEL: vecdus +; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] +; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecucss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = trunc i16 %0 to i8 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecucss +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecucss +; CHECK-BE: li [[OFFSET:[0-9]+]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecuiss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sext i16 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecuiss +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2w 2, 2 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecuiss +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2w 2, 2 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @veculss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sext i16 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: veculss +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2d 2, 2 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: veculss +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <16 x i8> @vecscss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = trunc i16 %0 to i8 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %conv, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK-LABEL: vecscss +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vspltb 2, 2, 7 +; CHECK-BE-LABEL: vecscss +; CHECK-BE: li [[OFFSET:[0-9]+]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE-NEXT: vspltb 2, 2, 7 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @vecsiss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sext i16 %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK-LABEL: vecsiss +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2w 2, 2 +; CHECK-NEXT: xxspltw 34, 34, 1 +; CHECK-BE-LABEL: vecsiss +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2w 2, 2 +; CHECK-BE-NEXT: xxspltw 34, 34, 1 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @vecslss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sext i16 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK-LABEL: vecslss +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2d 2, 2 +; CHECK-NEXT: xxspltd 34, 34, 0 +; CHECK-BE-LABEL: vecslss +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: xxspltd 34, 34, 0 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @vecfss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sitofp i16 %0 to float + %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK-LABEL: vecfss +; CHECK: lxsihzx +; CHECK-NEXT: vextsh2d +; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]], +; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-LABEL: vecfss +; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 +; CHECK-BE-NEXT: vextsh2d +; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]], +; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] +; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @vecdss(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sitofp i16 %0 to double + %splat.splatinsert = insertelement <2 x double> undef, double %conv, i32 0 + %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %splat.splat +; CHECK-LABEL: vecdss +; CHECK: lxsihzx +; CHECK-NEXT: vextsh2d +; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]], +; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-LABEL: vecdss +; CHECK-BE: lxsihzx +; CHECK-BE-NEXT: vextsh2d +; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]], +; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +} + +; Function Attrs: norecurse nounwind +define void @storefsc(float %f, i8* nocapture %ptr) { +entry: + %conv = fptosi float %f to i8 + store i8 %conv, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storefsc +; CHECK: xscvdpsxws 0, 1 +; CHECK: stxsibx 0, 0, 4 +; CHECK-BE-LABEL: storefsc +; CHECK-BE: xscvdpsxws 0, 1 +; CHECK-BE: stxsibx 0, 0, 4 +} + +; Function Attrs: norecurse nounwind +define void @storedsc(double %d, i8* nocapture %ptr) { +entry: + %conv = fptosi double %d to i8 + store i8 %conv, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storedsc +; CHECK: xscvdpsxws 0, 1 +; CHECK: stxsibx 0, 0, 4 +; CHECK-BE-LABEL: storedsc +; CHECK-BE: xscvdpsxws 0, 1 +; CHECK-BE: stxsibx 0, 0, 4 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc0(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 0 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc0 +; CHECK: vsldoi 2, 2, 2, 8 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc0 +; CHECK-BE: vsldoi 2, 2, 2, 9 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc1(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 1 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc1 +; CHECK: vsldoi 2, 2, 2, 7 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc1 +; CHECK-BE: vsldoi 2, 2, 2, 10 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc2(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 2 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc2 +; CHECK: vsldoi 2, 2, 2, 6 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc2 +; CHECK-BE: vsldoi 2, 2, 2, 11 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc3(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 3 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc3 +; CHECK: vsldoi 2, 2, 2, 5 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc3 +; CHECK-BE: vsldoi 2, 2, 2, 12 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc4(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 4 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc4 +; CHECK: vsldoi 2, 2, 2, 4 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc4 +; CHECK-BE: vsldoi 2, 2, 2, 13 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc5(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 5 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc5 +; CHECK: vsldoi 2, 2, 2, 3 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc5 +; CHECK-BE: vsldoi 2, 2, 2, 14 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc6(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 6 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc6 +; CHECK: vsldoi 2, 2, 2, 2 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc6 +; CHECK-BE: vsldoi 2, 2, 2, 15 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc7(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 7 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc7 +; CHECK: vsldoi 2, 2, 2, 1 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc7 +; CHECK-BE: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc8(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 8 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc8 +; CHECK: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc8 +; CHECK-BE: vsldoi 2, 2, 2, 1 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc9(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 9 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc9 +; CHECK: vsldoi 2, 2, 2, 15 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc9 +; CHECK-BE: vsldoi 2, 2, 2, 2 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc10(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 10 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc10 +; CHECK: vsldoi 2, 2, 2, 14 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc10 +; CHECK-BE: vsldoi 2, 2, 2, 3 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc11(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 11 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc11 +; CHECK: vsldoi 2, 2, 2, 13 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc11 +; CHECK-BE: vsldoi 2, 2, 2, 4 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc12(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 12 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc12 +; CHECK: vsldoi 2, 2, 2, 12 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc12 +; CHECK-BE: vsldoi 2, 2, 2, 5 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc13(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 13 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc13 +; CHECK: vsldoi 2, 2, 2, 11 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc13 +; CHECK-BE: vsldoi 2, 2, 2, 6 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc14(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 14 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc14 +; CHECK: vsldoi 2, 2, 2, 10 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc14 +; CHECK-BE: vsldoi 2, 2, 2, 7 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevcsc15(<16 x i8> %v, i8* nocapture %ptr) { +entry: + %vecext = extractelement <16 x i8> %v, i32 15 + store i8 %vecext, i8* %ptr, align 1 + ret void +; CHECK-LABEL: storevcsc15 +; CHECK: vsldoi 2, 2, 2, 9 +; CHECK-NEXT: stxsibx 34, 0, 5 +; CHECK-BE-LABEL: storevcsc15 +; CHECK-BE: vsldoi 2, 2, 2, 8 +; CHECK-BE-NEXT: stxsibx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storefss(float %f, i16* nocapture %ptr) { +entry: + %conv = fptosi float %f to i16 + store i16 %conv, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storefss +; CHECK: xscvdpsxws 0, 1 +; CHECK: stxsihx 0, 0, 4 +; CHECK-BE-LABEL: storefss +; CHECK-BE: xscvdpsxws 0, 1 +; CHECK-BE: stxsihx 0, 0, 4 +} + +; Function Attrs: norecurse nounwind +define void @storedss(double %d, i16* nocapture %ptr) { +entry: + %conv = fptosi double %d to i16 + store i16 %conv, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storedss +; CHECK: xscvdpsxws 0, 1 +; CHECK: stxsihx 0, 0, 4 +; CHECK-BE-LABEL: storedss +; CHECK-BE: xscvdpsxws 0, 1 +; CHECK-BE: stxsihx 0, 0, 4 +} + +; Function Attrs: norecurse nounwind +define void @storevsss0(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 0 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss0 +; CHECK: vsldoi 2, 2, 2, 8 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss0 +; CHECK-BE: vsldoi 2, 2, 2, 10 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss1(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 1 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss1 +; CHECK: vsldoi 2, 2, 2, 6 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss1 +; CHECK-BE: vsldoi 2, 2, 2, 12 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss2(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 2 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss2 +; CHECK: vsldoi 2, 2, 2, 4 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss2 +; CHECK-BE: vsldoi 2, 2, 2, 14 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss3(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 3 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss3 +; CHECK: vsldoi 2, 2, 2, 2 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss3 +; CHECK-BE: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss4(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 4 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss4 +; CHECK: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss4 +; CHECK-BE: vsldoi 2, 2, 2, 2 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss5(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 5 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss5 +; CHECK: vsldoi 2, 2, 2, 14 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss5 +; CHECK-BE: vsldoi 2, 2, 2, 4 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss6(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 6 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss6 +; CHECK: vsldoi 2, 2, 2, 12 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss6 +; CHECK-BE: vsldoi 2, 2, 2, 6 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind +define void @storevsss7(<8 x i16> %v, i16* nocapture %ptr) { +entry: + %vecext = extractelement <8 x i16> %v, i32 7 + store i16 %vecext, i16* %ptr, align 2 + ret void +; CHECK-LABEL: storevsss7 +; CHECK: vsldoi 2, 2, 2, 10 +; CHECK-NEXT: stxsihx 34, 0, 5 +; CHECK-BE-LABEL: storevsss7 +; CHECK-BE: vsldoi 2, 2, 2, 8 +; CHECK-BE-NEXT: stxsihx 34, 0, 5 +} + +; Function Attrs: norecurse nounwind readonly +define float @convscf(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sitofp i8 %0 to float + ret float %conv +; CHECK-LABEL: convscf +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2d 2, 2 +; CHECK-NEXT: xscvsxdsp 1, 34 +; CHECK-BE-LABEL: convscf +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: xscvsxdsp 1, 34 +} + +; Function Attrs: norecurse nounwind readonly +define float @convucf(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = uitofp i8 %0 to float + ret float %conv +; CHECK-LABEL: convucf +; CHECK: lxsibzx 0, 0, 3 +; CHECK-NEXT: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: convucf +; CHECK-BE: lxsibzx 0, 0, 3 +; CHECK-BE-NEXT: xscvuxdsp 1, 0 +} + +; Function Attrs: norecurse nounwind readonly +define double @convscd(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = sitofp i8 %0 to double +; CHECK-LABEL: convscd +; CHECK: lxsibzx 34, 0, 3 +; CHECK-NEXT: vextsb2d 2, 2 +; CHECK-NEXT: xscvsxddp 1, 34 +; CHECK-BE-LABEL: convscd +; CHECK-BE: lxsibzx 34, 0, 3 +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: xscvsxddp 1, 34 + ret double %conv +} + +; Function Attrs: norecurse nounwind readonly +define double @convucd(i8* nocapture readonly %ptr) { +entry: + %0 = load i8, i8* %ptr, align 1 + %conv = uitofp i8 %0 to double + ret double %conv +; CHECK-LABEL: convucd +; CHECK: lxsibzx 0, 0, 3 +; CHECK-NEXT: xscvuxddp 1, 0 +; CHECK-BE-LABEL: convucd +; CHECK-BE: lxsibzx 0, 0, 3 +; CHECK-BE-NEXT: xscvuxddp 1, 0 +} + +; Function Attrs: norecurse nounwind readonly +define float @convssf(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sitofp i16 %0 to float + ret float %conv +; CHECK-LABEL: convssf +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2d 2, 2 +; CHECK-NEXT: xscvsxdsp 1, 34 +; CHECK-BE-LABEL: convssf +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: xscvsxdsp 1, 34 +} + +; Function Attrs: norecurse nounwind readonly +define float @convusf(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = uitofp i16 %0 to float + ret float %conv +; CHECK-LABEL: convusf +; CHECK: lxsihzx 0, 0, 3 +; CHECK-NEXT: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: convusf +; CHECK-BE: lxsihzx 0, 0, 3 +; CHECK-BE-NEXT: xscvuxdsp 1, 0 +} + +; Function Attrs: norecurse nounwind readonly +define double @convssd(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = sitofp i16 %0 to double + ret double %conv +; CHECK-LABEL: convssd +; CHECK: lxsihzx 34, 0, 3 +; CHECK-NEXT: vextsh2d 2, 2 +; CHECK-NEXT: xscvsxddp 1, 34 +; CHECK-BE-LABEL: convssd +; CHECK-BE: lxsihzx 34, 0, 3 +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: xscvsxddp 1, 34 +} + +; Function Attrs: norecurse nounwind readonly +define double @convusd(i16* nocapture readonly %ptr) { +entry: + %0 = load i16, i16* %ptr, align 2 + %conv = uitofp i16 %0 to double + ret double %conv +; CHECK-LABEL: convusd +; CHECK: lxsihzx 0, 0, 3 +; CHECK-NEXT: xscvuxddp 1, 0 +; CHECK-BE-LABEL: convusd +; CHECK-BE: lxsihzx 0, 0, 3 +; CHECK-BE-NEXT: xscvuxddp 1, 0 +} Index: test/CodeGen/PowerPC/vsx-spill-norwstore.ll =================================================================== --- test/CodeGen/PowerPC/vsx-spill-norwstore.ll +++ test/CodeGen/PowerPC/vsx-spill-norwstore.ll @@ -4,6 +4,7 @@ @.str1 = external unnamed_addr constant [5 x i8], align 1 @.str10 = external unnamed_addr constant [9 x i8], align 1 +@.v2f64 = external unnamed_addr constant <2 x double>, align 16 ; Function Attrs: nounwind define void @main() #0 { @@ -12,6 +13,7 @@ ; CHECK: stxvd2x entry: + %val = load <2 x double>, <2 x double>* @.v2f64, align 16 %0 = tail call <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8> ) #0 %1 = tail call <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8> ) #0 br i1 false, label %if.then.i68.i, label %check.exit69.i @@ -23,7 +25,7 @@ br i1 undef, label %if.then.i63.i, label %check.exit64.i if.then.i63.i: ; preds = %check.exit69.i - tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0 + tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0), <2 x double> %val) #0 br label %check.exit64.i check.exit64.i: ; preds = %if.then.i63.i, %check.exit69.i Index: test/CodeGen/PowerPC/vsx-vec-spill.ll =================================================================== --- test/CodeGen/PowerPC/vsx-vec-spill.ll +++ test/CodeGen/PowerPC/vsx-vec-spill.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=ppc64 -mattr=+vsx -verify-machineinstrs | \ +; RUN: FileCheck %s --check-prefix=VSX +; RUN: llc < %s -march=ppc64 -mattr=-vsx -verify-machineinstrs | \ +; RUN: FileCheck %s --check-prefix=NOVSX + +define <2 x double> @interleaving_VSX_VMX( + <2 x double> %a, <2 x double> %b, <2 x double> %c, + <2 x double> %d, <2 x double> %e, <2 x double> %f) { +entry: + tail call void asm sideeffect "# clobbers", + "~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind + tail call void @goo(<2 x double> %a) nounwind + %add = fadd <2 x double> %a, %b + %sub = fsub <2 x double> %a, %b + %mul = fmul <2 x double> %add, %sub + %add1 = fadd <2 x double> %c, %d + %sub2 = fsub <2 x double> %c, %d + %mul3 = fmul <2 x double> %add1, %sub2 + %add4 = fadd <2 x double> %mul, %mul3 + %add5 = fadd <2 x double> %e, %f + %sub6 = fsub <2 x double> %e, %f + %mul7 = fmul <2 x double> %add5, %sub6 + %add8 = fadd <2 x double> %add4, %mul7 + ret <2 x double> %add8 +; VSX-LABEL: interleaving_VSX_VMX +; VSX-NOT: stvx +; VSX-NOT: lvx + +; NOVSX-LABEL: interleaving_VSX_VMX +; NOVSX-NOT: stxvd2x +; NOVSX-NOT: lxvd2x +} + +declare void @goo(<2 x double>) Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -70,10 +70,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test5 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxlxor -; CHECK-FISL: vor 2 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test5 @@ -91,10 +88,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test6 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlxor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test6 @@ -112,10 +106,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test7 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlxor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlxor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test7 @@ -133,10 +124,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test8 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxlor -; CHECK-FISL: vor 2 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test8 @@ -154,10 +142,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test9 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test9 @@ -175,10 +160,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test10 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test10 @@ -196,10 +178,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test11 -; CHECK-FISL: vor -; CHECK-FISL: vor -; CHECK-FISL: xxland -; CHECK-FISL: vor 2 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test11 @@ -217,10 +196,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test12 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxland 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test12 @@ -238,10 +214,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test13 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxland 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test13 @@ -260,11 +233,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test14 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: xxlor 0, 37, 36 -; CHECK-FISL: xxlnor 36, 37, 36 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlnor 34, 34, 35 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 ; CHECK-FISL: stxvd2x 0, 1, 0 @@ -286,17 +256,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test15 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlnor 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test15 @@ -315,17 +281,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test16 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlor 0, 34, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlnor 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test16 @@ -344,11 +306,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test17 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: xxlnor 36, 36, 36 -; CHECK-FISL: xxland 36, 37, 36 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlnor 35, 35, 35 +; CHECK-FISL: xxland 34, 34, 35 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test17 @@ -367,17 +326,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test18 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlandc 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlnor 0, 35, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlandc 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test18 @@ -396,17 +351,13 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test19 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlnor 36, 36, 37 -; CHECK-FISL: vor 0, 4, 4 -; CHECK-FISL: vor 4, 2, 2 -; CHECK-FISL: vor 5, 3, 3 -; CHECK-FISL: xxlandc 36, 36, 37 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: xxlnor 0, 35, 35 +; CHECK-FISL: xxlor 36, 0, 0 +; CHECK-FISL: xxlandc 0, 34, 35 +; CHECK-FISL: xxlor 34, 0, 0 ; CHECK-FISL: lis 0, -1 ; CHECK-FISL: ori 0, 0, 65520 -; CHECK-FISL: stvx 0, 1, 0 +; CHECK-FISL: stxvd2x 36, 1, 0 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test19 @@ -425,19 +376,9 @@ ; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-REG: blr -; FIXME: The fast-isel code is pretty miserable for this one. - ; CHECK-FISL-LABEL: @test20 -; CHECK-FISL: vor 0, 5, 5 -; CHECK-FISL: vor 1, 4, 4 -; CHECK-FISL: vor 6, 3, 3 -; CHECK-FISL: vor 7, 2, 2 -; CHECK-FISL: vor 2, 1, 1 -; CHECK-FISL: vor 3, 0, 0 -; CHECK-FISL: vcmpequw 2, 2, 3 -; CHECK-FISL: vor 0, 2, 2 -; CHECK-FISL: xxsel 32, 38, 39, 32 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: vcmpequw {{[0-9]+}}, 4, 5 +; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test20 @@ -458,13 +399,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test21 -; CHECK-FISL: vor 0, 5, 5 -; CHECK-FISL: vor 1, 4, 4 -; CHECK-FISL: vor 6, 3, 3 -; CHECK-FISL: vor 7, 2, 2 -; CHECK-FISL: xvcmpeqsp 32, 33, 32 -; CHECK-FISL: xxsel 32, 38, 39, 32 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: xvcmpeqsp [[V1:[0-9]+]], 36, 37 +; CHECK-FISL: xxsel 34, 35, 34, [[V1]] ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test21 @@ -491,14 +427,14 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test22 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 32 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 32, 32 -; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 33 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36 +; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37 ; CHECK-FISL-DAG: xxlnor ; CHECK-FISL-DAG: xxlnor ; CHECK-FISL-DAG: xxlor ; CHECK-FISL-DAG: xxlor -; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}} +; CHECK-FISL: xxsel 34, 35, 34, {{[0-9]+}} ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test22 @@ -526,11 +462,7 @@ ; CHECK-FISL-LABEL: @test23 ; CHECK-FISL: vcmpequh 4, 4, 5 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: vor 1, 2, 2 -; CHECK-FISL: vor 6, 4, 4 -; CHECK-FISL: xxsel 32, 32, 33, 38 -; CHECK-FISL: vor 2, 0, +; CHECK-FISL: xxsel 34, 35, 34, 36 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test23 @@ -552,11 +484,7 @@ ; CHECK-FISL-LABEL: @test24 ; CHECK-FISL: vcmpequb 4, 4, 5 -; CHECK-FISL: vor 0, 3, 3 -; CHECK-FISL: vor 1, 2, 2 -; CHECK-FISL: vor 6, 4, 4 -; CHECK-FISL: xxsel 32, 32, 33, 38 -; CHECK-FISL: vor 2, 0, 0 +; CHECK-FISL: xxsel 34, 35, 34, 36 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test24 @@ -682,8 +610,6 @@ ; CHECK-FISL-LABEL: @test30 ; CHECK-FISL: lxvd2x 0, 0, 3 ; CHECK-FISL: xxlor 34, 0, 0 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: vor 2, 3, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test30 @@ -715,8 +641,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test32 -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test32 @@ -734,8 +659,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test33 @@ -770,8 +694,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test33u -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test33u @@ -789,8 +712,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test34 -; CHECK-FISL: lxvw4x 0, 0, 3 -; CHECK-FISL: xxlor 34, 0, 0 +; CHECK-FISL: lxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test34 @@ -808,8 +730,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test35 -; CHECK-FISL: vor 3, 2, 2 -; CHECK-FISL: stxvw4x 35, 0, 3 +; CHECK-FISL: stxvw4x 34, 0, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test35 @@ -1086,10 +1007,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test65 -; CHECK-FISL: vor 4, 3, 3 -; CHECK-FISL: vor 5, 2, 2 -; CHECK-FISL: vcmpequw 4, 5, 4 -; CHECK-FISL: vor 2, 4, 4 +; CHECK-FISL: vcmpequw 2, 2, 3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test65 @@ -1107,8 +1025,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test66 -; CHECK-FISL: vcmpequw {{[0-9]+}}, 5, 4 -; CHECK-FISL: xxlnor 34, {{[0-9]+}}, {{[0-9]+}} +; CHECK-FISL: vcmpequw 2, 2, 3 +; CHECK-FISL: xxlnor 34, 34, 34 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test66