Index: llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ llvm/trunk/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -271,7 +271,8 @@ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isImm()); + assert(MO.isImm() && !(MO.getImm() % 16) && + "Expecting an immediate that is a multiple of 16"); return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; } Index: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -178,7 +178,7 @@ /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -211,7 +211,11 @@ /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + } + + bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } // Select an address into a single register. @@ -305,6 +309,7 @@ bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); + bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); }; @@ -2999,6 +3004,25 @@ return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); } +/// Does this node represent a load/store node whose address can be represented +/// with a register plus an immediate that's a multiple of \p Val: +bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { + LoadSDNode *LDN = dyn_cast(N); + StoreSDNode *STN = dyn_cast(N); + SDValue AddrOp; + if (LDN) + AddrOp = LDN->getOperand(1); + else if (STN) + AddrOp = STN->getOperand(2); + + short Imm = 0; + if (AddrOp.getOpcode() == ISD::ADD) + return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + + // If the address comes from the outside, the offset will be zero. + return AddrOp.getOpcode() == ISD::CopyFromReg; +} + void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -616,7 +616,7 @@ /// is not better represented as reg+reg. If Aligned is true, only accept /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, bool Aligned) const; + SelectionDAG &DAG, unsigned Alignment) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2130,12 +2130,12 @@ /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. If Aligned is true, only accept displacements -/// suitable for STD and friends, i.e. multiples of 4. +/// represented as reg+reg. If \p Alignment is non-zero, only accept +/// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, - bool Aligned) const { + unsigned Alignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. @@ -2145,7 +2145,7 @@ if (N.getOpcode() == ISD::ADD) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -2169,7 +2169,7 @@ } else if (N.getOpcode() == ISD::OR) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -2196,7 +2196,7 @@ // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" int16_t Imm; - if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { + if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -2206,7 +2206,7 @@ // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && - (!Aligned || (CN->getZExtValue() & 3) == 0)) { + (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -2321,14 +2321,14 @@ // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) return false; } Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -405,6 +405,25 @@ return cast(N)->getAlignment() < 4; }]>; +// This is a somewhat weaker condition than actually checking for 16-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form +// instructions). +def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; + //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -815,7 +834,8 @@ def iaddr : ComplexPattern; def xaddr : ComplexPattern; def xoaddr : ComplexPattern; -def ixaddr : ComplexPattern; // "std" +def ixaddr : ComplexPattern; // "std" +def iqaddr : ComplexPattern; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -2606,37 +2606,41 @@ } // IsLittleEndian, HasP9Vector // D-Form Load/Store - def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; - - def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; + def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2788,21 +2792,21 @@ let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddr:$src))]>; + [(set f32:$XT, (load ixaddr:$src))]>; def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddr:$src))]>; + [(set f64:$XT, (load ixaddr:$src))]>; def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddr:$dst)]>; + [(store f32:$XT, ixaddr:$dst)]>; def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddr:$dst)]>; + [(store f64:$XT, ixaddr:$dst)]>; } - def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (extloadf32 iaddr:$src))), - (f32 (DFLOADf32 iaddr:$src))>; + def : Pat<(f64 (extloadf32 ixaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))), + (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity // Integer extend helper dags 32 -> 64 @@ -2881,13 +2885,13 @@ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); } def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); @@ -2911,13 +2915,13 @@ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); } def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); } def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); @@ -3088,17 +3092,17 @@ (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; } Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -754,19 +754,31 @@ return false; } -// Figure out if the offset in the instruction must be a multiple of 4. -// This is true for instructions like "STD". -static bool usesIXAddr(const MachineInstr &MI) { +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { unsigned OpC = MI.getOpcode(); switch (OpC) { default: - return false; + return 1; case PPC::LWA: case PPC::LWA_32: case PPC::LD: + case PPC::LDU: case PPC::STD: - return true; + case PPC::STDU: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::STXSD: + case PPC::STXSSP: + return 4; + case PPC::LXV: + case PPC::STXV: + return 16; } } @@ -852,9 +864,6 @@ MI.getOperand(FIOperandNum).ChangeToRegister( FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); - // Figure out if the offset in the instruction is shifted right two bits. - bool isIXAddr = usesIXAddr(MI); - // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && @@ -883,7 +892,8 @@ // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + if (!noImmForm && ((isInt<16>(Offset) && + ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -1076,5 +1086,5 @@ return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT || - (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); + (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0); } Index: llvm/trunk/test/CodeGen/PowerPC/PR33671.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/PR33671.ll +++ llvm/trunk/test/CodeGen/PowerPC/PR33671.ll @@ -0,0 +1,32 @@ +; Function Attrs: norecurse nounwind +; RUN: llc -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 < %s | FileCheck %s +define void @test1(i32* nocapture readonly %arr, i32* nocapture %arrTo) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 4 + %0 = bitcast i32* %arrayidx to <4 x i32>* + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 4 + %1 = bitcast i32* %arrayidx1 to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + store <4 x i32> %2, <4 x i32>* %0, align 16 + ret void +; CHECK-LABEL: test1 +; CHECK: lxv [[LD:[0-9]+]], 16(3) +; CHECK: stxv [[LD]], 16(4) +} + +; Function Attrs: norecurse nounwind +define void @test2(i32* nocapture readonly %arr, i32* nocapture %arrTo) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 1 + %0 = bitcast i32* %arrayidx to <4 x i32>* + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 + %1 = bitcast i32* %arrayidx1 to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + store <4 x i32> %2, <4 x i32>* %0, align 16 + ret void +; CHECK-LABEL: test2 +; CHECK: addi 3, 3, 8 +; CHECK: lxvx [[LD:[0-9]+]], 0, 3 +; CHECK: addi 3, 4, 4 +; CHECK: stxvx [[LD]], 0, 3 +} Index: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1018,13 +1018,13 @@ ; P8BE-LABEL: fromDiffMemVarDi ; P8LE-LABEL: fromDiffMemVarDi ; P9BE: sldi {{r[0-9]+}}, r4, 2 -; P9BE-DAG: lxv {{v[0-9]+}} -; P9BE-DAG: lxv +; P9BE-DAG: lxvx {{v[0-9]+}} +; P9BE-DAG: lxvx ; P9BE: vperm ; P9BE: blr ; P9LE: sldi {{r[0-9]+}}, r4, 2 -; P9LE-DAG: lxv {{v[0-9]+}} -; P9LE-DAG: lxv +; P9LE-DAG: lxvx {{v[0-9]+}} +; P9LE-DAG: lxvx ; P9LE: vperm ; P9LE: blr ; P8BE: sldi {{r[0-9]+}}, r4, 2 @@ -1584,16 +1584,16 @@ ; P9LE-LABEL: fromDiffMemConsAConvdtoi ; P8BE-LABEL: fromDiffMemConsAConvdtoi ; P8LE-LABEL: fromDiffMemConsAConvdtoi -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9BE: xvcvspsxws v2, v2 -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] @@ -2177,12 +2177,14 @@ ; P8BE-LABEL: fromDiffMemVarDui ; P8LE-LABEL: fromDiffMemVarDui ; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9BE-DAG: lxv {{v[0-9]+}}, -12(r3) -; P9BE-DAG: lxv +; P9BE-DAG: addi r3, r3, -12 +; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3 +; P9BE-DAG: lxvx ; P9BE: vperm ; P9BE: blr ; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9LE-DAG: lxv {{v[0-9]+}}, -12(r3) +; P9LE-DAG: addi r3, r3, -12 +; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3 ; P9LE-DAG: lxv ; P9LE: vperm ; P9LE: blr @@ -2742,16 +2744,16 @@ ; P9LE-LABEL: fromDiffMemConsAConvdtoui ; P8BE-LABEL: fromDiffMemConsAConvdtoui ; P8LE-LABEL: fromDiffMemConsAConvdtoui -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9BE: xvcvspuxws v2, v2 -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] @@ -3466,9 +3468,9 @@ ; P9LE-LABEL: fromDiffConstsConvftoll ; P8BE-LABEL: fromDiffConstsConvftoll ; P8LE-LABEL: fromDiffConstsConvftoll -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr -; P9LE: lxv v2 +; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr @@ -4370,9 +4372,9 @@ ; P9LE-LABEL: fromDiffConstsConvftoull ; P8BE-LABEL: fromDiffConstsConvftoull ; P8LE-LABEL: fromDiffConstsConvftoull -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr -; P9LE: lxv v2 +; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -63,7 +63,7 @@ ; FIXME: li [[R1:r[0-9]+]], 1 ; FIXME: li [[R2:r[0-9]+]], 0 ; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]] -; CHECK-P9: lxv [[V1:v[0-9]+]] +; CHECK-P9: lxvx [[V1:v[0-9]+]] ; CHECK-P9: vadduqm v2, v2, [[V1]] ; CHECK-P9: blr @@ -237,8 +237,8 @@ ; CHECK-LE: blr ; CHECK-P9-LABEL: @call_v1i128_increment_by_val -; CHECK-P9-DAG: lxv v2 -; CHECK-P9-DAG: lxv v3 +; CHECK-P9-DAG: lxvx v2 +; CHECK-P9-DAG: lxvx v3 ; CHECK-P9: bl v1i128_increment_by_val ; CHECK-P9: blr Index: llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll +++ llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll @@ -33,11 +33,11 @@ ; CHECK: stxvd2x [[REG5]] ; CHECK-P9-LABEL: @bar0 -; CHECK-P9-DAG: lxv [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 ; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxv [[REG5]] +; CHECK-P9: stxvx [[REG5]] define void @bar1() { entry: @@ -56,9 +56,9 @@ ; CHECK: stxvd2x [[REG5]] ; CHECK-P9-LABEL: @bar1 -; CHECK-P9-DAG: lxv [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 ; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxv [[REG5]] +; CHECK-P9: stxvx [[REG5]] Index: llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll +++ llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll @@ -36,8 +36,8 @@ %1 = load <16 x i8>, <16 x i8>* @ucb, align 16 %add.i = add <16 x i8> %1, %0 tail call void (...) @sink(<16 x i8> %add.i) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vaddubm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -45,8 +45,8 @@ %3 = load <16 x i8>, <16 x i8>* @scb, align 16 %add.i22 = add <16 x i8> %3, %2 tail call void (...) @sink(<16 x i8> %add.i22) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vaddubm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -54,8 +54,8 @@ %5 = load <8 x i16>, <8 x i16>* @usb, align 16 %add.i21 = add <8 x i16> %5, %4 tail call void (...) @sink(<8 x i16> %add.i21) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduhm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -63,8 +63,8 @@ %7 = load <8 x i16>, <8 x i16>* @ssb, align 16 %add.i20 = add <8 x i16> %7, %6 tail call void (...) @sink(<8 x i16> %add.i20) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduhm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -72,8 +72,8 @@ %9 = load <4 x i32>, <4 x i32>* @uib, align 16 %add.i19 = add <4 x i32> %9, %8 tail call void (...) @sink(<4 x i32> %add.i19) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduwm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -81,8 +81,8 @@ %11 = load <4 x i32>, <4 x i32>* @sib, align 16 %add.i18 = add <4 x i32> %11, %10 tail call void (...) @sink(<4 x i32> %add.i18) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduwm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -90,8 +90,8 @@ %13 = load <2 x i64>, <2 x i64>* @ullb, align 16 %add.i17 = add <2 x i64> %13, %12 tail call void (...) @sink(<2 x i64> %add.i17) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vaddudm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -99,8 +99,8 @@ %15 = load <2 x i64>, <2 x i64>* @sllb, align 16 %add.i16 = add <2 x i64> %15, %14 tail call void (...) @sink(<2 x i64> %add.i16) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vaddudm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -108,8 +108,8 @@ %17 = load <1 x i128>, <1 x i128>* @uxb, align 16 %add.i15 = add <1 x i128> %17, %16 tail call void (...) @sink(<1 x i128> %add.i15) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduqm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -117,8 +117,8 @@ %19 = load <1 x i128>, <1 x i128>* @sxb, align 16 %add.i14 = add <1 x i128> %19, %18 tail call void (...) @sink(<1 x i128> %add.i14) -; CHECK: lxv 34, 0(3) -; CHECK: lxv 35, 0(4) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 ; CHECK: vadduqm 2, 3, 2 ; CHECK: stxv 34, ; CHECK: bl sink @@ -126,8 +126,8 @@ %21 = load <4 x float>, <4 x float>* @vfb, align 16 %add.i13 = fadd <4 x float> %20, %21 tail call void (...) @sink(<4 x float> %add.i13) -; CHECK: lxv 0, 0(3) -; CHECK: lxv 1, 0(4) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 ; CHECK: xvaddsp 34, 0, 1 ; CHECK: stxv 34, ; CHECK: bl sink @@ -135,8 +135,8 @@ %23 = load <2 x double>, <2 x double>* @vdb, align 16 %add.i12 = fadd <2 x double> %22, %23 tail call void (...) @sink(<2 x double> %add.i12) -; CHECK: lxv 0, 0(3) -; CHECK: lxv 1, 0(4) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 ; CHECK: xvadddp 0, 0, 1 ; CHECK: stxv 0, ; CHECK: bl sink