Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -231,6 +231,45 @@ return false; } + /// SelectDSForm - Returns true if address N can be represented by the + /// addressing mode of DSForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 4. + bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(4)) == PPC::AM_DSForm; + } + + /// SelectDQForm - Returns true if address N can be represented by the + /// addressing mode of DQForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 16. + bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(16)) == PPC::AM_DQForm; + } + + /// SelectDForm - Returns true if address N can be represented by + /// the addressing mode of DForm instructions (a base register, plus a + /// signed 16-bit immediate. + bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_DForm; + } + + /// SelectXForm - Returns true if address N can be represented by the + /// addressing mode of XForm instructions (an indexed [r+r] operation). + bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_XForm; + } + + /// SelectForceXForm - Given the specified address, force it to be + /// represented as an indexed [r+r] operation (an XForm instruction). + bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == + PPC::AM_XForm; + } + /// SelectAddrIdx - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is D. Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -667,10 +667,55 @@ /// amount, otherwise return -1. int isQVALIGNIShuffleMask(SDNode *N); + // Flags for computing the optimal addressing mode for loads and stores. + enum MemOpFlags { + MOF_None = 0, + + // Type extend type flags. + MOF_SExt = 1, + MOF_ZExt = 1 << 1, + MOF_NoExt = 1 << 2, + + // Address computation flags. + MOF_NotAdd = 1 << 5, + MOF_RPlusSImm16 = 1 << 6, + MOF_RPlusLo = 1 << 7, + MOF_RPlusSImm16Mult4 = 1 << 8, + MOF_RPlusSImm16Mult16 = 1 << 9, + MOF_RPlusSImm34 = 1 << 10, + MOF_RPlusR = 1 << 11, + MOF_PCRel = 1 << 12, + MOF_AddrIsSImm32 = 1 << 13, + + // The in-memory type. + MOF_SubWInt = 1 << 15, + MOF_WordInt = 1 << 16, + MOF_DWInt = 1 << 17, + MOF_ScalFlt = 1 << 18, + MOF_Vec = 1 << 19, + MOF_Vec256 = 1 << 20, + + // Subtarget features. + MOF_SubtargetNoP9 = 1 << 26, + MOF_SubtargetP9 = 1 << 27, + MOF_SubtargetP10 = 1 << 28, + MOF_SubtargetSPE = 1 << 29 + }; + + // The addressing modes for loads and stores. + enum AddrMode { + AM_None, + AM_DForm, + AM_DSForm, + AM_DQForm, + AM_XForm, + }; } // end namespace PPC class PPCTargetLowering : public TargetLowering { const PPCSubtarget &Subtarget; + std::map> AddrModesMap; + void initializeAddrModeMap(); public: explicit PPCTargetLowering(const PPCTargetMachine &TM, @@ -1035,6 +1080,15 @@ const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override; + PPC::AddrMode getAddrModeForFlags(unsigned Flags) const; + unsigned computeMOFlags(const SDNode *Parent, SDValue N, + SelectionDAG &DAG) const; + PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, + SDValue &Disp, SDValue &Base, + SelectionDAG &DAG, + MaybeAlign Align) const; + PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; /// Structure that collects some common arguments that get passed around /// between the functions for call lowering. @@ -1330,6 +1384,8 @@ bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); + bool isIntS32Immediate(SDNode *N, int32_t &Imm); + bool isIntS32Immediate(SDValue Op, int32_t &Imm); bool isIntS34Immediate(SDNode *N, int64_t &Imm); bool isIntS34Immediate(SDValue Op, int64_t &Imm); Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -141,6 +141,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { + initializeAddrModeMap(); // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); @@ -1391,6 +1392,50 @@ PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); } +/// initializeAddrModeMap - Initialize the map that relates the different +/// instruction formats of load and store instructions to a set of flags. +/// This ensures the load/store instruction is correctly matched during +/// instruction selection. +void PPCTargetLowering::initializeAddrModeMap() { + AddrModesMap[PPC::AM_DForm] = { + PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWInt, + PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWInt, + PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWInt, + PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWInt, + PPC::MOF_ZExt | PPC::MOF_NotAdd | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_NotAdd | PPC::MOF_SubWInt, + PPC::MOF_SExt | PPC::MOF_NotAdd | PPC::MOF_SubWInt, + PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWInt, + PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWInt, + PPC::MOF_RPlusSImm16 | PPC::MOF_ScalFlt | PPC::MOF_SubtargetNoP9, + PPC::MOF_RPlusLo | PPC::MOF_ScalFlt | PPC::MOF_SubtargetNoP9, + PPC::MOF_NotAdd | PPC::MOF_ScalFlt | PPC::MOF_SubtargetNoP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalFlt | PPC::MOF_SubtargetNoP9, + }; + AddrModesMap[PPC::AM_DSForm] = { + PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt, + PPC::MOF_SExt | PPC::MOF_NotAdd | PPC::MOF_WordInt, + PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, + PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DWInt, + PPC::MOF_NotAdd | PPC::MOF_DWInt, + PPC::MOF_AddrIsSImm32 | PPC::MOF_DWInt, + PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalFlt | PPC::MOF_SubtargetP9, + PPC::MOF_NotAdd | PPC::MOF_ScalFlt | PPC::MOF_SubtargetP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalFlt | PPC::MOF_SubtargetP9, + }; + AddrModesMap[PPC::AM_DQForm] = { + PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vec | PPC::MOF_SubtargetP9, + PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vec256 | PPC::MOF_SubtargetP10, + PPC::MOF_NotAdd | PPC::MOF_Vec | PPC::MOF_SubtargetP9, + PPC::MOF_NotAdd | PPC::MOF_Vec256 | PPC::MOF_SubtargetP10, + PPC::MOF_AddrIsSImm32 | PPC::MOF_Vec | PPC::MOF_SubtargetP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_Vec256 | PPC::MOF_SubtargetP10, + }; +} + /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { @@ -2432,6 +2477,37 @@ return isIntS16Immediate(Op.getNode(), Imm); } +/// isIntS32Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extensions from a 32-bit value. If so, this returns true and the +/// immediate. +bool llvm::isIntS32Immediate(SDNode *N, int32_t &Imm) { + if (!isa(N)) + return false; + + Imm = (int32_t)cast(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return true; + else + return Imm == (int64_t)cast(N)->getZExtValue(); +} +bool llvm::isIntS32Immediate(SDValue Op, int32_t &Imm) { + return isIntS32Immediate(Op.getNode(), Imm); +} + +/// provablyDisjointOr - used when computing address flags for selecting +/// loads and stores. If we have an OR, check if the LHS and RHS are provably +/// disjoint. This is for when we have an OR of disjoint bitfields, we can +/// codegen it as an add (for better address arithmetic). +bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) { + if (N.getOpcode() != ISD::OR) + return false; + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); + if (!LHSKnown.Zero.getBoolValue()) + return false; + KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); + return (~(LHSKnown.Zero | RHSKnown.Zero) == 0); +} /// SelectAddressEVXRegReg - Given the specified address, check to see if it can /// be represented as an indexed [r+r] operation. @@ -17071,3 +17147,278 @@ return SDValue(); } + +/// getAddrModeForFlags - Based on the set of address flags, select the most +/// optimal instruction format to match by. +PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const { + // This is not a node we should be handling here. + if (Flags == PPC::MOF_None) + return PPC::AM_None; + // Unaligned D-Forms are tried first, followed by the aligned D-Forms. + for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DSForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DQForm; + // If no other forms are selected, return an X-Form instructions can + // always be matched. + return PPC::AM_XForm; +} + +/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute +/// the address flags of the load/store instruction that is to be matched. +/// The address flags are are stored in a map, which is then searched through +/// to determine the optimal load/store instruction format to match by. +unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, + SelectionDAG &DAG) const { + unsigned FlagSet = PPC::MOF_None; + + // Compute subtarget flags. + const PPCSubtarget &Subtarget = + static_cast(DAG.getSubtarget()); + if (!Subtarget.hasP9Vector()) + FlagSet |= PPC::MOF_SubtargetNoP9; + else { + FlagSet |= PPC::MOF_SubtargetP9; + if (Subtarget.isISA3_1()) + FlagSet |= PPC::MOF_SubtargetP10; + } + if (Subtarget.hasSPE()) + FlagSet |= PPC::MOF_SubtargetSPE; + + // Mark this as something we don't want to handle here if it is atomic + // or pre-increment instruction. + const MemSDNode *MN = dyn_cast(Parent); + if (const LSBaseSDNode *LSB = dyn_cast(Parent)) + if (LSB->isIndexed()) + return PPC::MOF_None; + if (isa(Parent)) + return PPC::MOF_None; + + // Compute in-memory type flags. This isbased on if there are scalars, + // floats or vectors. + assert(MN && "Parent should be a MemSDNode!"); + EVT MemVT = MN->getMemoryVT(); + unsigned Size = MemVT.getSizeInBits(); + if (MemVT.isScalarInteger()) { + assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!"); + if (Size < 32) + FlagSet |= PPC::MOF_SubWInt; + else if (Size == 32) + FlagSet |= PPC::MOF_WordInt; + else + FlagSet |= PPC::MOF_DWInt; + } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Vectors only. + if (Size == 128) + FlagSet |= PPC::MOF_Vec; + else if (Size == 256) + FlagSet |= PPC::MOF_Vec256; + else + llvm_unreachable("Not expecting illegal vectors!"); + } else { // Floating point type: can be scalar, f128 or vector types. + if (Size == 32 || Size == 64) + FlagSet |= PPC::MOF_ScalFlt; + else if (MemVT == MVT::f128 || MemVT.isVector()) + FlagSet |= PPC::MOF_Vec; + else + llvm_unreachable("Not expecting illegal scalar floats!"); + } + + auto SetAlignFlagsForImm = [&](uint64_t Imm) { + if ((Imm & 0x3) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult4; + if ((Imm & 0xf) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult16; + }; + + // Compute flags for address computation. + if (ConstantSDNode *CN = dyn_cast(N)) { + // All 32-bit constants can be computed as LIS + Disp. + int32_t Imm32 = 0; + int64_t Imm34 = 0; + if (isIntS32Immediate(CN, Imm32)) { // Flag to handle 32-bit consstants. + FlagSet |= PPC::MOF_AddrIsSImm32; + SetAlignFlagsForImm(Imm32); + } else if (isIntS34Immediate(CN, Imm34)) // Flag to handle 34-bit constants. + FlagSet |= PPC::MOF_RPlusSImm34; + else // Let constant materialization handle large constants. + FlagSet |= PPC::MOF_NotAdd; + } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) { + // This address can be represented as an addition of: + // - Register + Imm16 (possibly a multiple of 4/16) + // - Register + Imm34 + // - Register + PPCISD::Lo + // - Register + Register + // In any case, we won't have to match this as Base + Zero. + SDValue RHS = N.getOperand(1); + if (ConstantSDNode *CN = dyn_cast(RHS)) { + int16_t Imm16 = 0; + int64_t Imm34 = 0; + if (isIntS16Immediate(CN, Imm16)) { + FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates. + SetAlignFlagsForImm(Imm16); + } else if (isIntS34Immediate(CN, Imm34)) + FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates. + else + FlagSet |= PPC::MOF_RPlusR; // Register. + } else if (RHS.getOpcode() == PPCISD::Lo && + !cast(RHS.getOperand(1))->getZExtValue()) + FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo. + else + FlagSet |= PPC::MOF_RPlusR; + } else // The address computation is not a constant or an addition. + FlagSet |= PPC::MOF_NotAdd; + + // Compute type extension flags. + if (const LoadSDNode *LN = dyn_cast(Parent)) { + switch (LN->getExtensionType()) { + case ISD::SEXTLOAD: + FlagSet |= PPC::MOF_SExt; + break; + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: + FlagSet |= PPC::MOF_ZExt; + break; + case ISD::NON_EXTLOAD: + FlagSet |= PPC::MOF_NoExt; + break; + } + } else + FlagSet |= PPC::MOF_NoExt; + + // For integers, no extension is the same as zero extension. + if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) { + FlagSet |= PPC::MOF_ZExt; + FlagSet &= ~PPC::MOF_NoExt; + } + + // Prior to P10, constants that fit in 34-bits on should be marked with + // `PPC::MOF_NotAdd` to match by D-Form. + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR && + (FlagSet & PPC::MOF_RPlusSImm34) && !(FlagSet & PPC::MOF_AddrIsSImm32) && + !(FlagSet & PPC::MOF_SubtargetP10)) + FlagSet |= PPC::MOF_NotAdd; + + return FlagSet; +} + +/// SelectForceXFormMode - Given the specified address, force it to be +/// represented as an indexed [r+r] operation (an XForm instruction) +PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + + PPC::AddrMode Mode = PPC::AM_XForm; + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. Register pressure if we do + // not materialize a constant just for use as the index register. + // The add is only removed if it is not an add of a value and a 16-bit signed + // constant and both have a single use. + int16_t ForceXFormImm = 0; + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { + Disp = N.getOperand(0); + Base = N.getOperand(1); + return Mode; + } + + // Otherwise, use R0 as the base register. + Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + N.getValueType()); + Base = N; + + return Mode; +} + +/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), +/// compute the address flags of the node, get the optimal address mode based +/// on the flags, and set the Base and Disp based on the address mode. +PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, + SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG, + MaybeAlign Align) const { + SDLoc dl(Parent); + + // Compute the address flags. + unsigned Flags = computeMOFlags(Parent, N, DAG); + + // Get the optimal address mode based on the Flags. + PPC::AddrMode Mode = getAddrModeForFlags(Flags); + + // Set Base and Disp accordingly depending on the address mode. + switch (Mode) { + case PPC::AM_DForm: + case PPC::AM_DSForm: + case PPC::AM_DQForm: { + // Can represent as an ADD. + if (Flags & PPC::MOF_RPlusSImm16) { + int16_t Imm = 0; + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + if (isIntS16Immediate(Op1, Imm) && (!Align || isAligned(*Align, Imm))) { + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + if (FrameIndexSDNode *FI = dyn_cast(Op0)) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else + Base = Op0; + break; + } + } + // Match LOAD (ADD (X, Lo(G))). + else if (Flags & PPC::MOF_RPlusLo) { + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetGlobalTLSAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + break; + } + // Match 16-bit and 32-bit constant addresses. + else if (Flags & PPC::MOF_AddrIsSImm32) { + ConstantSDNode *CN = dyn_cast(N); + EVT CNType = CN->getValueType(0); + int16_t Imm16; + int32_t Imm32; + if (isIntS16Immediate(N, Imm16) && (!Align || isAligned(*Align, Imm16))) { + Disp = DAG.getTargetConstant(Imm16, dl, CNType); + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + CNType); + break; + } else if (isIntS32Immediate(N, Imm32) && + (!Align || isAligned(*Align, Imm32))) { + Disp = DAG.getTargetConstant((short)Imm32, dl, MVT::i32); + Base = DAG.getTargetConstant((Imm32 - (signed short)Imm32) >> 16, dl, + MVT::i32); + unsigned LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getMachineNode(LIS, dl, CNType, Base), 0); + break; + } + } + // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable. + Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); + if (FrameIndexSDNode *FI = dyn_cast(N)) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else + Base = N; + break; + } + case PPC::AM_None: { + break; + } + default: { + Base = N.getOperand(1); + Disp = N.getOperand(0); + break; + } + } + return Mode; +} Index: llvm/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1057,21 +1057,21 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi16 iaddr:$src))]>, + [(set i64:$rD, (sextloadi16 dfaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64, + (sextloadi32 dsfaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), "lhax $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi16 xaddr:$src))]>, + [(set i64:$rD, (sextloadi16 xfaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 xfaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { @@ -1112,23 +1112,23 @@ let PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi8 iaddr:$src))]>; + [(set i64:$rD, (zextloadi8 dfaddr:$src))]>; def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), "lhz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi16 iaddr:$src))]>; + [(set i64:$rD, (zextloadi16 dfaddr:$src))]>; def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), "lwz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + [(set i64:$rD, (zextloadi32 dfaddr:$src))]>, isPPC64; def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi8 xaddr:$src))]>; + [(set i64:$rD, (zextloadi8 xfaddr:$src))]>; def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$rD), (ins memrr:$src), "lhzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi16 xaddr:$src))]>; + [(set i64:$rD, (zextloadi16 xfaddr:$src))]>; def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi32 xaddr:$src))]>; + [(set i64:$rD, (zextloadi32 xfaddr:$src))]>; // Update forms. @@ -1173,7 +1173,7 @@ let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, - [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (aligned4load dsfaddr:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1196,10 +1196,10 @@ def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src), "ldx $rD, $src", IIC_LdStLD, - [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (load xfaddr:$src))]>, isPPC64; def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src), "ldbrx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + [(set i64:$rD, (PPClbrx forcexfaddr:$src, i64))]>, isPPC64; let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in { def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src), @@ -1356,38 +1356,38 @@ // Truncating stores. def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), "stb $rS, $src", IIC_LdStStore, - [(truncstorei8 i64:$rS, iaddr:$src)]>; + [(truncstorei8 i64:$rS, dfaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), "sth $rS, $src", IIC_LdStStore, - [(truncstorei16 i64:$rS, iaddr:$src)]>; + [(truncstorei16 i64:$rS, dfaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), "stw $rS, $src", IIC_LdStStore, - [(truncstorei32 i64:$rS, iaddr:$src)]>; + [(truncstorei32 i64:$rS, dfaddr:$src)]>; def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), "stbx $rS, $dst", IIC_LdStStore, - [(truncstorei8 i64:$rS, xaddr:$dst)]>, + [(truncstorei8 i64:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), "sthx $rS, $dst", IIC_LdStStore, - [(truncstorei16 i64:$rS, xaddr:$dst)]>, + [(truncstorei16 i64:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), "stwx $rS, $dst", IIC_LdStStore, - [(truncstorei32 i64:$rS, xaddr:$dst)]>, + [(truncstorei32 i64:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; } // Interpretation64Bit // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, - [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64; + [(aligned4store i64:$rS, dsfaddr:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, - [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, + [(store i64:$rS, xfaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), "stdbrx $rS, $dst", IIC_LdStStore, - [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, + [(PPCstbrx i64:$rS, forcexfaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } @@ -1520,26 +1520,26 @@ (i64not $in)>; // Extending loads with i64 targets. -def : Pat<(zextloadi1 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(zextloadi1 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi1 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), - (LHZ8 iaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), - (LHZX8 xaddr:$src)>; -def : Pat<(extloadi32 iaddr:$src), - (LWZ8 iaddr:$src)>; -def : Pat<(extloadi32 xaddr:$src), - (LWZX8 xaddr:$src)>; +def : Pat<(zextloadi1 dfaddr:$src), + (LBZ8 dfaddr:$src)>; +def : Pat<(zextloadi1 xfaddr:$src), + (LBZX8 xfaddr:$src)>; +def : Pat<(extloadi1 dfaddr:$src), + (LBZ8 dfaddr:$src)>; +def : Pat<(extloadi1 xfaddr:$src), + (LBZX8 xfaddr:$src)>; +def : Pat<(extloadi8 dfaddr:$src), + (LBZ8 dfaddr:$src)>; +def : Pat<(extloadi8 xfaddr:$src), + (LBZX8 xfaddr:$src)>; +def : Pat<(extloadi16 dfaddr:$src), + (LHZ8 dfaddr:$src)>; +def : Pat<(extloadi16 xfaddr:$src), + (LHZX8 xfaddr:$src)>; +def : Pat<(extloadi32 dfaddr:$src), + (LWZ8 dfaddr:$src)>; +def : Pat<(extloadi32 xfaddr:$src), + (LWZX8 xfaddr:$src)>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 6-bit and 7-bit shift @@ -1591,12 +1591,12 @@ // Patterns to match r+r indexed loads and stores for // addresses without at least 4-byte alignment. -def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), - (LWAX xoaddr:$src)>; -def : Pat<(i64 (unaligned4load xoaddr:$src)), - (LDX xoaddr:$src)>; -def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), - (STDX $rS, xoaddr:$dst)>; +def : Pat<(i64 (unaligned4sextloadi32 forcexfaddr:$src)), + (LWAX forcexfaddr:$src)>; +def : Pat<(i64 (unaligned4load forcexfaddr:$src)), + (LDX forcexfaddr:$src)>; +def : Pat<(unaligned4store i64:$rS, forcexfaddr:$dst), + (STDX $rS, forcexfaddr:$dst)>; // 64-bits atomic loads and stores def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>; Index: llvm/lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -414,46 +414,46 @@ let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; + [(set v16i8:$vD, (int_ppc_altivec_lvebx forcexfaddr:$src))]>; def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$vD), (ins memrr:$src), "lvehx $vD, $src", IIC_LdStLoad, - [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; + [(set v8i16:$vD, (int_ppc_altivec_lvehx forcexfaddr:$src))]>; def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$vD), (ins memrr:$src), "lvewx $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvewx forcexfaddr:$src))]>; def LVX : XForm_1_memOp<31, 103, (outs vrrc:$vD), (ins memrr:$src), "lvx $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvx forcexfaddr:$src))]>; def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$vD), (ins memrr:$src), "lvxl $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvxl forcexfaddr:$src))]>; } def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$vD), (ins memrr:$src), "lvsl $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsl forcexfaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$vD), (ins memrr:$src), "lvsr $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsr forcexfaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores. def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), "stvebx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvebx v16i8:$rS, forcexfaddr:$dst)]>; def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), "stvehx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvehx v8i16:$rS, forcexfaddr:$dst)]>; def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), "stvewx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvewx v4i32:$rS, forcexfaddr:$dst)]>; def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), "stvx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvx v4i32:$rS, forcexfaddr:$dst)]>; def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), "stvxl $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvxl v4i32:$rS, forcexfaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. @@ -890,11 +890,11 @@ def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>; // Loads. -def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; +def : Pat<(v4i32 (load forcexfaddr:$src)), (LVX forcexfaddr:$src)>; // Stores. -def : Pat<(store v4i32:$rS, xoaddr:$dst), - (STVX $rS, xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, forcexfaddr:$dst), + (STVX $rS, forcexfaddr:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1091,6 +1091,13 @@ // PC Relative Address def pcreladdr : ComplexPattern; +// Load and Store Instruction Selection addressing modes. +def dfaddr : ComplexPattern; +def dsfaddr : ComplexPattern; +def dqfaddr : ComplexPattern; +def xfaddr : ComplexPattern; +def forcexfaddr : ComplexPattern; + //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. def In32BitMode : Predicate<"!Subtarget->isPPC64()">; @@ -2165,25 +2172,25 @@ let PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi8 iaddr:$src))]>; + [(set i32:$rD, (zextloadi8 dfaddr:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, - [(set i32:$rD, (sextloadi16 iaddr:$src))]>, + [(set i32:$rD, (sextloadi16 dfaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), "lhz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi16 iaddr:$src))]>; + [(set i32:$rD, (zextloadi16 dfaddr:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), "lwz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (load iaddr:$src))]>; + [(set i32:$rD, (load dfaddr:$src))]>; let Predicates = [HasFPU] in { def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), "lfs $rD, $src", IIC_LdStLFD, - [(set f32:$rD, (load iaddr:$src))]>; + [(set f32:$rD, (load dfaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), "lfd $rD, $src", IIC_LdStLFD, - [(set f64:$rD, (load iaddr:$src))]>; + [(set f64:$rD, (load dfaddr:$src))]>; } @@ -2268,17 +2275,17 @@ let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi8 xaddr:$src))]>; + [(set i32:$rD, (zextloadi8 xfaddr:$src))]>; def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src), "lhax $rD, $src", IIC_LdStLHA, - [(set i32:$rD, (sextloadi16 xaddr:$src))]>, + [(set i32:$rD, (sextloadi16 xfaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src), "lhzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi16 xaddr:$src))]>; + [(set i32:$rD, (zextloadi16 xfaddr:$src))]>; def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (load xaddr:$src))]>; + [(set i32:$rD, (load xfaddr:$src))]>; def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src), "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; @@ -2289,10 +2296,10 @@ let Predicates = [HasFPU] in { def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src), "lfsx $frD, $src", IIC_LdStLFD, - [(set f32:$frD, (load xaddr:$src))]>; + [(set f32:$frD, (load xfaddr:$src))]>; def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src), "lfdx $frD, $src", IIC_LdStLFD, - [(set f64:$frD, (load xaddr:$src))]>; + [(set f64:$frD, (load xfaddr:$src))]>; def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src), "lfiwax $frD, $src", IIC_LdStLFD, @@ -2316,20 +2323,20 @@ let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst), "stb $rS, $dst", IIC_LdStStore, - [(truncstorei8 i32:$rS, iaddr:$dst)]>; + [(truncstorei8 i32:$rS, dfaddr:$dst)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst), "sth $rS, $dst", IIC_LdStStore, - [(truncstorei16 i32:$rS, iaddr:$dst)]>; + [(truncstorei16 i32:$rS, dfaddr:$dst)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst), "stw $rS, $dst", IIC_LdStStore, - [(store i32:$rS, iaddr:$dst)]>; + [(store i32:$rS, dfaddr:$dst)]>; let Predicates = [HasFPU] in { def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", IIC_LdStSTFD, - [(store f32:$rS, iaddr:$dst)]>; + [(store f32:$rS, dfaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), "stfd $rS, $dst", IIC_LdStSTFD, - [(store f64:$rS, iaddr:$dst)]>; + [(store f64:$rS, dfaddr:$dst)]>; } } @@ -2372,15 +2379,15 @@ let PPC970_Unit = 2 in { def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst), "stbx $rS, $dst", IIC_LdStStore, - [(truncstorei8 i32:$rS, xaddr:$dst)]>, + [(truncstorei8 i32:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst), "sthx $rS, $dst", IIC_LdStStore, - [(truncstorei16 i32:$rS, xaddr:$dst)]>, + [(truncstorei16 i32:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst), "stwx $rS, $dst", IIC_LdStStore, - [(store i32:$rS, xaddr:$dst)]>, + [(store i32:$rS, xfaddr:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst), @@ -2399,10 +2406,10 @@ def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", IIC_LdStSTFD, - [(store f32:$frS, xaddr:$dst)]>; + [(store f32:$frS, xfaddr:$dst)]>; def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), "stfdx $frS, $dst", IIC_LdStSTFD, - [(store f64:$frS, xaddr:$dst)]>; + [(store f64:$frS, xfaddr:$dst)]>; } } @@ -3482,27 +3489,27 @@ def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; -def : Pat<(i32 (zextloadi1 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (zextloadi1 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi1 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (extloadi1 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi8 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (extloadi8 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi16 iaddr:$src)), - (LHZ iaddr:$src)>; -def : Pat<(i32 (extloadi16 xaddr:$src)), - (LHZX xaddr:$src)>; +def : Pat<(i32 (zextloadi1 dfaddr:$src)), + (LBZ dfaddr:$src)>; +def : Pat<(i32 (zextloadi1 xfaddr:$src)), + (LBZX xfaddr:$src)>; +def : Pat<(i32 (extloadi1 dfaddr:$src)), + (LBZ dfaddr:$src)>; +def : Pat<(i32 (extloadi1 xfaddr:$src)), + (LBZX xfaddr:$src)>; +def : Pat<(i32 (extloadi8 dfaddr:$src)), + (LBZ dfaddr:$src)>; +def : Pat<(i32 (extloadi8 xfaddr:$src)), + (LBZX xfaddr:$src)>; +def : Pat<(i32 (extloadi16 dfaddr:$src)), + (LHZ dfaddr:$src)>; +def : Pat<(i32 (extloadi16 xfaddr:$src)), + (LHZX xfaddr:$src)>; let Predicates = [HasFPU] in { -def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; -def : Pat<(f64 (extloadf32 xaddr:$src)), - (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 dfaddr:$src)), + (COPY_TO_REGCLASS (LFS dfaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 xfaddr:$src)), + (COPY_TO_REGCLASS (LFSX xfaddr:$src), F8RC)>; def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -315,13 +315,13 @@ let CodeSize = 3 in def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#XFLOADf64", - [(set f64:$XT, (load xoaddr:$src))]>; + [(set f64:$XT, (load forcexfaddr:$src))]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVD2X : XX1Form_memOp<31, 844, (outs vsrc:$XT), (ins memrr:$src), "lxvd2x $XT, $src", IIC_LdStLFD, - [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x forcexfaddr:$src))]>; def LXVDSX : XX1Form_memOp<31, 332, (outs vsrc:$XT), (ins memrr:$src), @@ -346,7 +346,7 @@ let CodeSize = 3 in def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#XFSTOREf64", - [(store f64:$XT, xoaddr:$dst)]>; + [(store f64:$XT, forcexfaddr:$dst)]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // The behaviour of this instruction is endianness-specific so we provide no @@ -1126,15 +1126,15 @@ let CodeSize = 3 in def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), "#XFLOADf32", - [(set f32:$XT, (load xoaddr:$src))]>; + [(set f32:$XT, (load forcexfaddr:$src))]>; // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWAX", - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + [(set f64:$XT, (PPClfiwax forcexfaddr:$src))]>; // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWZX", - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + [(set f64:$XT, (PPClfiwzx forcexfaddr:$src))]>; } // mayLoad // VSX scalar stores introduced in ISA 2.07 @@ -1149,11 +1149,11 @@ let CodeSize = 3 in def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), "#XFSTOREf32", - [(store f32:$XT, xoaddr:$dst)]>; + [(store f32:$XT, forcexfaddr:$dst)]>; // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#STIWX", - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + [(PPCstfiwx f64:$XT, forcexfaddr:$dst)]>; } // mayStore // VSX Elementary Scalar FP arithmetic (SP) @@ -1680,9 +1680,9 @@ // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, - [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; + [(set f64:$XT, (PPClxsizx forcexfaddr:$src, 1))]>; def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, - [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; + [(set f64:$XT, (PPClxsizx forcexfaddr:$src, 2))]>; // Load Vector Halfword*8/Byte*16 Indexed def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; @@ -1690,7 +1690,7 @@ // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xaddrX16:$src))]>; + [(set v2f64:$XT, (load xfaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -1718,9 +1718,9 @@ // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, - [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; + [(PPCstxsix f64:$XT, forcexfaddr:$dst, 1)]>; def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, - [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; + [(PPCstxsix f64:$XT, forcexfaddr:$dst, 2)]>; let isCodeGenOnly = 1 in { def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>; def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>; @@ -1732,7 +1732,7 @@ // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xaddrX16:$dst)]>; + [(store v2f64:$XT, xfaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form_memOp<31, 397, (outs), @@ -1749,16 +1749,16 @@ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddrX4:$src))]>; + [(set f32:$XT, (load dsfaddr:$src))]>; def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddrX4:$src))]>; + [(set f64:$XT, (load dsfaddr:$src))]>; def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddrX4:$dst)]>; + [(store f32:$XT, dsfaddr:$dst)]>; def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddrX4:$dst)]>; + [(store f64:$XT, dsfaddr:$dst)]>; let mayStore = 1 in { def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), @@ -1821,19 +1821,19 @@ } def ScalarLoads { - dag Li8 = (i32 (extloadi8 xoaddr:$src)); - dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); - dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); - dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); - dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); - - dag Li16 = (i32 (extloadi16 xoaddr:$src)); - dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); - dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); - dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); - dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); - - dag Li32 = (i32 (load xoaddr:$src)); + dag Li8 = (i32 (extloadi8 forcexfaddr:$src)); + dag ZELi8 = (i32 (zextloadi8 forcexfaddr:$src)); + dag ZELi8i64 = (i64 (zextloadi8 forcexfaddr:$src)); + dag SELi8 = (i32 (sext_inreg (extloadi8 forcexfaddr:$src), i8)); + dag SELi8i64 = (i64 (sext_inreg (extloadi8 forcexfaddr:$src), i8)); + + dag Li16 = (i32 (extloadi16 forcexfaddr:$src)); + dag ZELi16 = (i32 (zextloadi16 forcexfaddr:$src)); + dag ZELi16i64 = (i64 (zextloadi16 forcexfaddr:$src)); + dag SELi16 = (i32 (sextloadi16 forcexfaddr:$src)); + dag SELi16i64 = (i64 (sextloadi16 forcexfaddr:$src)); + + dag Li32 = (i32 (load forcexfaddr:$src)); } def DWToSPExtractConv { @@ -2271,22 +2271,22 @@ } def FltToIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 forcexfaddr:$A))))); } def FltToUIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 forcexfaddr:$A))))); } def FltToLongLoad { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 forcexfaddr:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 dsfaddr:$A))))); } def FltToULongLoad { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 forcexfaddr:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 dsfaddr:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); @@ -2313,38 +2313,38 @@ dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); } def DblToIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load forcexfaddr:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load dsfaddr:$A))))); } def DblToUIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load forcexfaddr:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load dsfaddr:$A))))); } def DblToLongLoad { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load forcexfaddr:$A))))); } def DblToULongLoad { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load forcexfaddr:$A))))); } // FP load dags (for f32 -> v4f32) def LoadFP { - dag A = (f32 (load xoaddr:$A)); - dag B = (f32 (load xoaddr:$B)); - dag C = (f32 (load xoaddr:$C)); - dag D = (f32 (load xoaddr:$D)); + dag A = (f32 (load forcexfaddr:$A)); + dag B = (f32 (load forcexfaddr:$B)); + dag C = (f32 (load forcexfaddr:$C)); + dag D = (f32 (load forcexfaddr:$D)); } // FP merge dags (for f32 -> v4f32) def MrgFP { - dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); - dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); - dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); - dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); + dag LD32A = (COPY_TO_REGCLASS (LIWZX forcexfaddr:$A), VSRC); + dag LD32B = (COPY_TO_REGCLASS (LIWZX forcexfaddr:$B), VSRC); + dag LD32C = (COPY_TO_REGCLASS (LIWZX forcexfaddr:$C), VSRC); + dag LD32D = (COPY_TO_REGCLASS (LIWZX forcexfaddr:$D), VSRC); dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $C, VSRC), 0)); dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), @@ -2698,12 +2698,12 @@ def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), (f64 (XSMAXDP $A, $B))>; -def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, forcexfaddr:$dst), + (STXVD2X $rS, forcexfaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, forcexfaddr:$dst), + (STXVW4X $rS, forcexfaddr:$dst)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be forcexfaddr:$src)), (LXVW4X forcexfaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; // Rounding for single precision. def : Pat<(f32 (any_fround f32:$S)), @@ -2749,18 +2749,18 @@ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; defm : ScalToVecWPermute< v4i32, FltToIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 forcexfaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 forcexfaddr:$A)), VSRC)>; defm : ScalToVecWPermute< v4i32, FltToUIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 forcexfaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 forcexfaddr:$A)), VSRC)>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; -def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), - (v2f64 (LXVDSX xoaddr:$A))>; -def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), - (v2i64 (LXVDSX xoaddr:$A))>; +def : Pat<(v2f64 (PPCldsplat forcexfaddr:$A)), + (v2f64 (LXVDSX forcexfaddr:$A))>; +def : Pat<(v2i64 (PPCldsplat forcexfaddr:$A)), + (v2i64 (LXVDSX forcexfaddr:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -2771,10 +2771,10 @@ (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; defm : ScalToVecWPermute< v2i64, DblToLongLoad.A, - (XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>; + (XVCVDPSXDS (LXVDSX forcexfaddr:$A)), (XVCVDPSXDS (LXVDSX forcexfaddr:$A))>; defm : ScalToVecWPermute< v2i64, DblToULongLoad.A, - (XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>; + (XVCVDPUXDS (LXVDSX forcexfaddr:$A)), (XVCVDPUXDS (LXVDSX forcexfaddr:$A))>; } // HasVSX // Any big endian VSX subtarget. @@ -2882,14 +2882,14 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; -def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(v2f64 (PPCld_vec_be forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; +def : Pat<(PPCst_vec_be v2f64:$rS, forcexfaddr:$dst), (STXVD2X $rS, forcexfaddr:$dst)>; +def : Pat<(v4f32 (PPCld_vec_be forcexfaddr:$src)), (LXVW4X forcexfaddr:$src)>; +def : Pat<(PPCst_vec_be v4f32:$rS, forcexfaddr:$dst), (STXVW4X $rS, forcexfaddr:$dst)>; +def : Pat<(v2i64 (PPCld_vec_be forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; +def : Pat<(PPCst_vec_be v2i64:$rS, forcexfaddr:$dst), (STXVD2X $rS, forcexfaddr:$dst)>; +def : Pat<(v4i32 (PPCld_vec_be forcexfaddr:$src)), (LXVW4X forcexfaddr:$src)>; +def : Pat<(PPCst_vec_be v4i32:$rS, forcexfaddr:$dst), (STXVW4X $rS, forcexfaddr:$dst)>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), @@ -2978,66 +2978,66 @@ // Any pre-Power9 VSX subtarget. let Predicates = [HasVSX, NoP9Vector] in { def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), forcexfaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), forcexfaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), forcexfaddr:$dst)>; // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). defm : ScalToVecWPermute< v4i32, DblToIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 forcexfaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 forcexfaddr:$A)), VSRC)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 forcexfaddr:$A)), VSRC), 1), + (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 forcexfaddr:$A)), VSRC)>; defm : ScalToVecWPermute< v2i64, FltToLongLoad.A, - (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), - (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 forcexfaddr:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 forcexfaddr:$A), VSFRC)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToULongLoad.A, - (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), - (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 forcexfaddr:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 forcexfaddr:$A), VSFRC)), sub_64)>; } // HasVSX, NoP9Vector // Any VSX subtarget that only has loads and stores that load in big endian // order regardless of endianness. This is really pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { - def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v2f64 (PPClxvd2x forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; // Stores. - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, forcexfaddr:$dst), + (STXVD2X $rS, forcexfaddr:$dst)>; + def : Pat<(PPCstxvd2x v2f64:$rS, forcexfaddr:$dst), (STXVD2X $rS, forcexfaddr:$dst)>; } // HasVSX, HasOnlySwappingMemOps // Big endian VSX subtarget that only has loads and stores that always // load in big endian order. Really big endian pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in { - def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(v2f64 (load forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; + def : Pat<(v2i64 (load forcexfaddr:$src)), (LXVD2X forcexfaddr:$src)>; + def : Pat<(v4i32 (load forcexfaddr:$src)), (LXVW4X forcexfaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x forcexfaddr:$src)), (LXVW4X forcexfaddr:$src)>; + def : Pat<(store v2f64:$rS, forcexfaddr:$dst), (STXVD2X $rS, forcexfaddr:$dst)>; + def : Pat<(store v2i64:$rS, forcexfaddr:$dst), (STXVD2X $rS, forcexfaddr:$dst)>; + def : Pat<(store v4i32:$XT, forcexfaddr:$dst), (STXVW4X $XT, forcexfaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, forcexfaddr:$dst), + (STXVW4X $rS, forcexfaddr:$dst)>; } // HasVSX, HasOnlySwappingMemOps, IsBigEndian // Any Power8 VSX subtarget. let Predicates = [HasVSX, HasP8Vector] in { def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; -def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; -def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), - (f32 (XFLOADf32 xoaddr:$src))>; +def : Pat<(f64 (extloadf32 forcexfaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf32 forcexfaddr:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 forcexfaddr:$src)))), + (f32 (XFLOADf32 forcexfaddr:$src))>; def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; @@ -3080,11 +3080,11 @@ // Instructions for converting float to i32 feeding a store. def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), forcexfaddr:$dst, 4), + (STIWX (XSCVDPSXWS f64:$src), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), forcexfaddr:$dst, 4), + (STIWX (XSCVDPUXWS f64:$src), forcexfaddr:$dst)>; def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), @@ -3154,16 +3154,16 @@ // LIWAX - This instruction is used for sign extending i32 -> i64. // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). -def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; -def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), +def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 forcexfaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX forcexfaddr:$src), VSRC))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 forcexfaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSRC))>; +def : Pat<(v4i32 (scalar_to_vector (i32 (load forcexfaddr:$src)))), (v4i32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; -def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSRC), 1))>; +def : Pat<(v4f32 (scalar_to_vector (f32 (load forcexfaddr:$src)))), (v4f32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSRC), 1))>; def : Pat; -def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (i32 (extractelt v4i32:$A, 1)), forcexfaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 1)), forcexfaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; // Elements in a register on a BE system are in order <0, 1, 2, 3>. // The store instructions store the second word from the left. // So to align element zero, we need to modulo-left-shift by 3 words. // Similar logic applies for elements 2 and 3. foreach Idx = [ [0,3], [2,1], [3,2] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), forcexfaddr:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + sub_64), forcexfaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), forcexfaddr:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; + sub_64), forcexfaddr:$src)>; } } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 @@ -3242,24 +3242,24 @@ // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). defm : ScalToVecWPermute< - v2i64, (i64 (sextloadi32 xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>; + v2i64, (i64 (sextloadi32 forcexfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWAX forcexfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWAX forcexfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v2i64, (i64 (zextloadi32 xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v2i64, (i64 (zextloadi32 forcexfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX forcexfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v4i32, (i32 (load xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4i32, (i32 (load forcexfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX forcexfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v4f32, (f32 (load xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4f32, (f32 (load forcexfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (LIWZX forcexfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (LIWZX forcexfaddr:$src), sub_64)>; def : Pat; -def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (i32 (extractelt v4i32:$A, 2)), forcexfaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 2)), forcexfaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; // Elements in a register on a LE system are in order <3, 2, 1, 0>. // The store instructions store the second word from the left. // So to align element 3, we need to modulo-left-shift by 3 words. // Similar logic applies for elements 0 and 1. foreach Idx = [ [0,2], [1,1], [3,3] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), forcexfaddr:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + sub_64), forcexfaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), forcexfaddr:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; + sub_64), forcexfaddr:$src)>; } } // HasVSX, HasP8Vector, IsLittleEndian // Big endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in { -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), forcexfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), forcexfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), forcexfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + forcexfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), forcexfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; + forcexfaddr:$src)>; } // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in { -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), forcexfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + forcexfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), forcexfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + forcexfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), forcexfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), forcexfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), forcexfaddr:$src)>; } // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian // Any VSX target with direct moves. @@ -3613,12 +3613,12 @@ // Convert (Un)Signed Word -> QP. def : Pat<(f128 (any_sint_to_fp i32:$src)), (f128 (XSCVSDQP (MTVSRWA $src)))>; -def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; +def : Pat<(f128 (any_sint_to_fp (i32 (load forcexfaddr:$src)))), + (f128 (XSCVSDQP (LIWAX forcexfaddr:$src)))>; def : Pat<(f128 (any_uint_to_fp i32:$src)), (f128 (XSCVUDQP (MTVSRWZ $src)))>; -def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; +def : Pat<(f128 (any_uint_to_fp (i32 (load forcexfaddr:$src)))), + (f128 (XSCVUDQP (LIWZX forcexfaddr:$src)))>; // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a // separate pattern so that it can convert the input register class from @@ -3659,95 +3659,95 @@ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; // D-Form Load/Store -def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), +def : Pat<(v4i32 (quadwOffsetLoad dqfaddr:$src)), (LXV memrix16:$src)>; +def : Pat<(v4f32 (quadwOffsetLoad dqfaddr:$src)), (LXV memrix16:$src)>; +def : Pat<(v2i64 (quadwOffsetLoad dqfaddr:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (quadwOffsetLoad dqfaddr:$src)), (LXV memrix16:$src)>; +def : Pat<(f128 (quadwOffsetLoad dqfaddr:$src)), (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x dqfaddr:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x dqfaddr:$src)), (LXV memrix16:$src)>; -def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), +def : Pat<(quadwOffsetStore v4f32:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v4i32:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v2f64:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore f128:$rS, dqfaddr:$dst), (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; -def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), +def : Pat<(quadwOffsetStore v2i64:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, dqfaddr:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), - (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; -def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), - (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; +def : Pat<(v2f64 (nonQuadwOffsetLoad forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(v2i64 (nonQuadwOffsetLoad forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(v4f32 (nonQuadwOffsetLoad forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(v4i32 (nonQuadwOffsetLoad forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x forcexfaddr:$src)), (LXVX forcexfaddr:$src)>; +def : Pat<(f128 (nonQuadwOffsetLoad forcexfaddr:$src)), + (COPY_TO_REGCLASS (LXVX forcexfaddr:$src), VRRC)>; +def : Pat<(nonQuadwOffsetStore f128:$rS, forcexfaddr:$dst), + (STXVX (COPY_TO_REGCLASS $rS, VSRC), forcexfaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v2f64:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v2i64:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v4f32:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v4i32:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, forcexfaddr:$dst), + (STXVX $rS, forcexfaddr:$dst)>; // Build vectors from i8 loads defm : ScalToVecWPermute; + (VSPLTBs 7, (LXSIBZX forcexfaddr:$src)), + (VSPLTBs 7, (LXSIBZX forcexfaddr:$src))>; defm : ScalToVecWPermute; + (VSPLTHs 3, (LXSIBZX forcexfaddr:$src)), + (VSPLTHs 3, (LXSIBZX forcexfaddr:$src))>; defm : ScalToVecWPermute; + (XXSPLTWs (LXSIBZX forcexfaddr:$src), 1), + (XXSPLTWs (LXSIBZX forcexfaddr:$src), 1)>; defm : ScalToVecWPermute; + (XXPERMDIs (LXSIBZX forcexfaddr:$src), 0), + (XXPERMDIs (LXSIBZX forcexfaddr:$src), 0)>; defm : ScalToVecWPermute; + (XXSPLTWs (VEXTSB2Ws (LXSIBZX forcexfaddr:$src)), 1), + (XXSPLTWs (VEXTSB2Ws (LXSIBZX forcexfaddr:$src)), 1)>; defm : ScalToVecWPermute; + (XXPERMDIs (VEXTSB2Ds (LXSIBZX forcexfaddr:$src)), 0), + (XXPERMDIs (VEXTSB2Ds (LXSIBZX forcexfaddr:$src)), 0)>; // Build vectors from i16 loads defm : ScalToVecWPermute; + (VSPLTHs 3, (LXSIHZX forcexfaddr:$src)), + (VSPLTHs 3, (LXSIHZX forcexfaddr:$src))>; defm : ScalToVecWPermute; + (XXSPLTWs (LXSIHZX forcexfaddr:$src), 1), + (XXSPLTWs (LXSIHZX forcexfaddr:$src), 1)>; defm : ScalToVecWPermute; + (XXPERMDIs (LXSIHZX forcexfaddr:$src), 0), + (XXPERMDIs (LXSIHZX forcexfaddr:$src), 0)>; defm : ScalToVecWPermute; + (XXSPLTWs (VEXTSH2Ws (LXSIHZX forcexfaddr:$src)), 1), + (XXSPLTWs (VEXTSH2Ws (LXSIHZX forcexfaddr:$src)), 1)>; defm : ScalToVecWPermute; + (XXPERMDIs (VEXTSH2Ds (LXSIHZX forcexfaddr:$src)), 0), + (XXPERMDIs (VEXTSH2Ds (LXSIHZX forcexfaddr:$src)), 0)>; // Load/convert and convert/store patterns for f16. -def : Pat<(f64 (extloadf16 xoaddr:$src)), - (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; -def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; -def : Pat<(f32 (extloadf16 xoaddr:$src)), - (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; -def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; +def : Pat<(f64 (extloadf16 forcexfaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX forcexfaddr:$src)))>; +def : Pat<(truncstoref16 f64:$src, forcexfaddr:$dst), + (STXSIHX (XSCVDPHP $src), forcexfaddr:$dst)>; +def : Pat<(f32 (extloadf16 forcexfaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX forcexfaddr:$src)), VSSRC))>; +def : Pat<(truncstoref16 f32:$src, forcexfaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), forcexfaddr:$dst)>; def : Pat<(f64 (f16_to_fp i32:$A)), (f64 (XSCVHPDP (MTVSRWZ $A)))>; def : Pat<(f32 (f16_to_fp i32:$A)), @@ -3762,33 +3762,33 @@ def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; -def : Pat<(f64 (extloadf32 iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; -def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), - (f32 (DFLOADf32 iaddrX4:$src))>; +def : Pat<(f64 (extloadf32 dsfaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 dsfaddr:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 dsfaddr:$src)))), + (f32 (DFLOADf32 dsfaddr:$src))>; -def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; -def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; +def : Pat<(v4f32 (PPCldvsxlh xfaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf64 xfaddr:$src), VSRC)>; +def : Pat<(v4f32 (PPCldvsxlh dsfaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf64 dsfaddr:$src), VSRC)>; // Convert (Un)Signed DWord in memory -> QP -def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; -def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; -def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; -def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; +def : Pat<(f128 (sint_to_fp (i64 (load xfaddr:$src)))), + (f128 (XSCVSDQP (LXSDX xfaddr:$src)))>; +def : Pat<(f128 (sint_to_fp (i64 (load dsfaddr:$src)))), + (f128 (XSCVSDQP (LXSD dsfaddr:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load xfaddr:$src)))), + (f128 (XSCVUDQP (LXSDX xfaddr:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load dsfaddr:$src)))), + (f128 (XSCVUDQP (LXSD dsfaddr:$src)))>; // Convert Unsigned HWord in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), - (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; + (f128 (XSCVUDQP (LXSIHZX xfaddr:$src)))>; // Convert Unsigned Byte in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), - (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; + (f128 (XSCVUDQP (LXSIBZX forcexfaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; @@ -3801,65 +3801,65 @@ // Instructions for store(fptosi). // The 8-byte version is repeated here due to availability of D-Form STXSD. def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xfaddr:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - xaddrX4:$dst)>; + xfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), dsfaddr:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - iaddrX4:$dst)>; + dsfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), forcexfaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), forcexfaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), forcexfaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xfaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), dsfaddr:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), dsfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), forcexfaddr:$dst, 2), + (STXSIHX (XSCVDPSXWS f64:$src), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), forcexfaddr:$dst, 1), + (STXSIBX (XSCVDPSXWS f64:$src), forcexfaddr:$dst)>; // Instructions for store(fptoui). def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xfaddr:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - xaddrX4:$dst)>; + xfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), dsfaddr:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - iaddrX4:$dst)>; + dsfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), forcexfaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), forcexfaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), forcexfaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xfaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), dsfaddr:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), dsfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), forcexfaddr:$dst, 2), + (STXSIHX (XSCVDPUXWS f64:$src), forcexfaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), forcexfaddr:$dst, 1), + (STXSIBX (XSCVDPUXWS f64:$src), forcexfaddr:$dst)>; // Round & Convert QP -> DP/SP def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>; @@ -3892,35 +3892,35 @@ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; defm : ScalToVecWPermute; + (XVCVSPSXWS (LXVWSX forcexfaddr:$A)), + (XVCVSPSXWS (LXVWSX forcexfaddr:$A))>; defm : ScalToVecWPermute; + (XVCVSPUXWS (LXVWSX forcexfaddr:$A)), + (XVCVSPUXWS (LXVWSX forcexfaddr:$A))>; defm : ScalToVecWPermute< v4i32, DblToIntLoadP9.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 dsfaddr:$A)), VSRC), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 dsfaddr:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoadP9.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; + (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 dsfaddr:$A)), VSRC), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 dsfaddr:$A)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToLongLoadP9.A, - (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 dsfaddr:$A), VSFRC)), 0), (SUBREG_TO_REG (i64 1), - (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; + (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 dsfaddr:$A), VSFRC)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToULongLoadP9.A, - (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 dsfaddr:$A), VSFRC)), 0), (SUBREG_TO_REG (i64 1), - (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; -def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), - (v4f32 (LXVWSX xoaddr:$A))>; -def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), - (v4i32 (LXVWSX xoaddr:$A))>; + (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 dsfaddr:$A), VSFRC)), sub_64)>; +def : Pat<(v4f32 (PPCldsplat forcexfaddr:$A)), + (v4f32 (LXVWSX forcexfaddr:$A))>; +def : Pat<(v4i32 (PPCldsplat forcexfaddr:$A)), + (v4i32 (LXVWSX forcexfaddr:$A))>; } // HasVSX, HasP9Vector // Big endian 64Bit Power9 subtarget. @@ -3959,86 +3959,86 @@ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; // Scalar stores of i8 -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), forcexfaddr:$dst)>; // Scalar stores of i16 -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; - -def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; - -def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; -def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), forcexfaddr:$dst)>; + +def : Pat<(v2i64 (scalar_to_vector (i64 (load dsfaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 dsfaddr:$src), VSRC))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (load xfaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xfaddr:$src), VSRC))>; + +def : Pat<(v2f64 (scalar_to_vector (f64 (load dsfaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 dsfaddr:$src), VSRC))>; +def : Pat<(v2f64 (scalar_to_vector (f64 (load xfaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xfaddr:$src), VSRC))>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + sub_64), xfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + sub_64), xfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), dsfaddr:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + sub_64), dsfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), dsfaddr:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + sub_64), dsfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), dsfaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), dsfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), dsfaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), dsfaddr:$src)>; // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), @@ -4134,105 +4134,105 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; -def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; -def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), - (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; +def : Pat<(v8i16 (PPCld_vec_be forcexfaddr:$src)), + (COPY_TO_REGCLASS (LXVH8X forcexfaddr:$src), VRRC)>; +def : Pat<(PPCst_vec_be v8i16:$rS, forcexfaddr:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), forcexfaddr:$dst)>; -def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; -def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), - (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; +def : Pat<(v16i8 (PPCld_vec_be forcexfaddr:$src)), + (COPY_TO_REGCLASS (LXVB16X forcexfaddr:$src), VRRC)>; +def : Pat<(PPCst_vec_be v16i8:$rS, forcexfaddr:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), forcexfaddr:$dst)>; // Scalar stores of i8 -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), forcexfaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), forcexfaddr:$dst)>; // Scalar stores of i16 -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), forcexfaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), forcexfaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), forcexfaddr:$dst)>; defm : ScalToVecWPermute< - v2i64, (i64 (load iaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; + v2i64, (i64 (load dsfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 dsfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 dsfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v2i64, (i64 (load xaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; + v2i64, (i64 (load xfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v2f64, (f64 (load iaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; + v2f64, (f64 (load dsfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 dsfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 dsfaddr:$src), sub_64)>; defm : ScalToVecWPermute< - v2f64, (f64 (load xaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; + v2f64, (f64 (load xfaddr:$src)), + (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xfaddr:$src), VSFRC), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xfaddr:$src), sub_64)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + sub_64), xfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xfaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + sub_64), xfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xfaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), dsfaddr:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + sub_64), dsfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), dsfaddr:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - iaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + dsfaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), dsfaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), dsfaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), dsfaddr:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), dsfaddr:$src)>; // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),