Index: lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -271,7 +271,8 @@ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isImm()); + assert(MO.isImm() && !(MO.getImm() % 16) && + "Expecting an immediate that is a multiple of 16"); return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; } Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -178,7 +178,7 @@ /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -211,7 +211,11 @@ /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + } + + bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } // Select an address into a single register. Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -612,7 +612,7 @@ /// is not better represented as reg+reg. If Aligned is true, only accept /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, bool Aligned) const; + SelectionDAG &DAG, unsigned Alignment = 0) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -2125,12 +2125,12 @@ /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. If Aligned is true, only accept displacements -/// suitable for STD and friends, i.e. multiples of 4. +/// represented as reg+reg. If \p Alignment is non-zero, only accept +/// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, - bool Aligned) const { + unsigned Alignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. @@ -2140,7 +2140,7 @@ if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -2164,7 +2164,7 @@ } else if (N.getOpcode() == ISD::OR) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -2191,7 +2191,7 @@ // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; - if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { + if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -2201,7 +2201,7 @@ // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && - (!Aligned || (CN->getZExtValue() & 3) == 0)) { + (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -856,10 +856,10 @@ def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>; // Loads. -def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; +def : Pat<(v4i32 (aligned16load xoaddr:$src)), (LVX xoaddr:$src)>; // Stores. -def : Pat<(store v4i32:$rS, xoaddr:$dst), +def : Pat<(aligned16store v4i32:$rS, xoaddr:$dst), (STVX $rS, xoaddr:$dst)>; // Bit conversions. Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -401,6 +401,22 @@ return cast(N)->getAlignment() < 4; }]>; +// Similar to the above but for 16-byte alignment. +def aligned16load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; +def aligned16store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; +def unaligned16load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 16; +}]>; +def unaligned16store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 16; +}]>; + //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -811,7 +827,8 @@ def iaddr : ComplexPattern; def xaddr : ComplexPattern; def xoaddr : ComplexPattern; -def ixaddr : ComplexPattern; // "std" +def ixaddr : ComplexPattern; // "std" +def iqaddr : ComplexPattern; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2514,37 +2514,41 @@ } // IsLittleEndian, HasP9Vector // D-Form Load/Store - def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; - - def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + def : Pat<(v4i32 (aligned16load iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (aligned16load iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (aligned16load iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (aligned16load iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(aligned16store v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(aligned16store v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(aligned16store v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(aligned16store v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; + def : Pat<(v2f64 (unaligned16load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (unaligned16load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (unaligned16load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (unaligned16load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(unaligned16store v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(unaligned16store v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(unaligned16store v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(unaligned16store v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2696,21 +2700,21 @@ let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddr:$src))]>; + [(set f32:$XT, (load ixaddr:$src))]>; def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddr:$src))]>; + [(set f64:$XT, (load ixaddr:$src))]>; def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddr:$dst)]>; + [(store f32:$XT, ixaddr:$dst)]>; def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddr:$dst)]>; + [(store f64:$XT, ixaddr:$dst)]>; } - def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (extloadf32 iaddr:$src))), - (f32 (DFLOADf32 iaddr:$src))>; + def : Pat<(f64 (extloadf32 ixaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))), + (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity // Integer extend helper dags 32 -> 64 @@ -2771,13 +2775,13 @@ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); } def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); @@ -2801,13 +2805,13 @@ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); } def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); } def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); @@ -2978,17 +2982,17 @@ (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; } Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -754,19 +754,31 @@ return false; } -// Figure out if the offset in the instruction must be a multiple of 4. -// This is true for instructions like "STD". -static bool usesIXAddr(const MachineInstr &MI) { +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { unsigned OpC = MI.getOpcode(); switch (OpC) { default: - return false; + return 1; case PPC::LWA: case PPC::LWA_32: case PPC::LD: + case PPC::LDU: case PPC::STD: - return true; + case PPC::STDU: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::STXSD: + case PPC::STXSSP: + return 4; + case PPC::LXV: + case PPC::STXV: + return 16; } } @@ -852,9 +864,6 @@ MI.getOperand(FIOperandNum).ChangeToRegister( FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); - // Figure out if the offset in the instruction is shifted right two bits. - bool isIXAddr = usesIXAddr(MI); - // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && @@ -883,7 +892,8 @@ // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + if (!noImmForm && ((isInt<16>(Offset) && + ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -1076,5 +1086,5 @@ return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT || - (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); + (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0); } Index: test/CodeGen/PowerPC/PR33671.ll =================================================================== --- test/CodeGen/PowerPC/PR33671.ll +++ test/CodeGen/PowerPC/PR33671.ll @@ -0,0 +1,32 @@ +; Function Attrs: norecurse nounwind +; RUN: llc -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 < %s | FileCheck %s +define void @test1(i32* nocapture readonly %arr, i32* nocapture %arrTo) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 4 + %0 = bitcast i32* %arrayidx to <4 x i32>* + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 4 + %1 = bitcast i32* %arrayidx1 to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + store <4 x i32> %2, <4 x i32>* %0, align 16 + ret void +; CHECK-LABEL: test1 +; CHECK: lxv [[LD:[0-9]+]], 16(3) +; CHECK: stxv [[LD]], 16(4) +} + +; Function Attrs: norecurse nounwind +define void @test2(i32* nocapture readonly %arr, i32* nocapture %arrTo) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 1 + %0 = bitcast i32* %arrayidx to <4 x i32>* + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 + %1 = bitcast i32* %arrayidx1 to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + store <4 x i32> %2, <4 x i32>* %0, align 16 + ret void +; CHECK-LABEL: test2 +; CHECK-DAG: addi 3, 3, 8 +; CHECK-DAG: addi 4, 4, 4 +; CHECK: lxv [[LD:[0-9]+]], 0(3) +; CHECK: stxv [[LD]], 0(4) +} Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -1303,10 +1303,10 @@ ; P9LE-LABEL: fromDiffMemConsAConvftoi ; P8BE-LABEL: fromDiffMemConsAConvftoi ; P8LE-LABEL: fromDiffMemConsAConvftoi -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 ; P9BE: xvcvspsxws v2, [[REG1]] ; P9BE: blr -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspsxws v2, [[REG1]] ; P9LE: blr ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 @@ -1584,16 +1584,18 @@ ; P9LE-LABEL: fromDiffMemConsAConvdtoi ; P8BE-LABEL: fromDiffMemConsAConvdtoi ; P8LE-LABEL: fromDiffMemConsAConvdtoi -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9BE: li [[IMM:r[0-9]+]], 16 +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: lxvx [[REG2:[vs0-9]+]], r3, [[IMM]] ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9BE: xvcvspsxws v2, v2 -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9LE: li [[IMM:r[0-9]+]], 16 +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: lxvx [[REG2:[vs0-9]+]], r3, [[IMM]] ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] @@ -2177,12 +2179,14 @@ ; P8BE-LABEL: fromDiffMemVarDui ; P8LE-LABEL: fromDiffMemVarDui ; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9BE-DAG: lxv {{v[0-9]+}}, -12(r3) +; P9BE-DAG: li [[IMM:r[0-9]+]], -12 +; P9BE-DAG: lxvx {{v[0-9]+}}, r3, [[IMM]] ; P9BE-DAG: lxv ; P9BE: vperm ; P9BE: blr ; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P9LE-DAG: lxv {{v[0-9]+}}, -12(r3) +; P9LE-DAG: li [[IMM:r[0-9]+]], -12 +; P9LE-DAG: lxvx {{v[0-9]+}}, r3, [[IMM]] ; P9LE-DAG: lxv ; P9LE: vperm ; P9LE: blr @@ -2461,10 +2465,10 @@ ; P9LE-LABEL: fromDiffMemConsAConvftoui ; P8BE-LABEL: fromDiffMemConsAConvftoui ; P8LE-LABEL: fromDiffMemConsAConvftoui -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 ; P9BE: xvcvspuxws v2, [[REG1]] ; P9BE: blr -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspuxws v2, [[REG1]] ; P9LE: blr ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 @@ -2742,16 +2746,18 @@ ; P9LE-LABEL: fromDiffMemConsAConvdtoui ; P8BE-LABEL: fromDiffMemConsAConvdtoui ; P8LE-LABEL: fromDiffMemConsAConvdtoui -; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9BE: li [[IMM:r[0-9]+]], 16 +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: lxvx [[REG2:[vs0-9]+]], r3, [[IMM]] ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9BE: xvcvspuxws v2, v2 -; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) -; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3) +; P9LE: li [[IMM:r[0-9]+]], 16 +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: lxvx [[REG2:[vs0-9]+]], r3, [[IMM]] ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] @@ -3188,9 +3194,9 @@ ; P9LE-LABEL: fromDiffMemConsAll ; P8BE-LABEL: fromDiffMemConsAll ; P8LE-LABEL: fromDiffMemConsAll -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr -; P9LE: lxv v2 +; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr @@ -3213,7 +3219,7 @@ ; P9LE-LABEL: fromDiffMemConsDll ; P8BE-LABEL: fromDiffMemConsDll ; P8LE-LABEL: fromDiffMemConsDll -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr ; P9LE: lxv ; P9LE: xxswapd v2 @@ -4092,9 +4098,9 @@ ; P9LE-LABEL: fromDiffMemConsAull ; P8BE-LABEL: fromDiffMemConsAull ; P8LE-LABEL: fromDiffMemConsAull -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr -; P9LE: lxv v2 +; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr @@ -4117,7 +4123,7 @@ ; P9LE-LABEL: fromDiffMemConsDull ; P8BE-LABEL: fromDiffMemConsDull ; P8LE-LABEL: fromDiffMemConsDull -; P9BE: lxv v2 +; P9BE: lxvx v2 ; P9BE: blr ; P9LE: lxv ; P9LE: xxswapd v2