Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -82,6 +82,8 @@ "Number of compares not eliminated as they have non-extending uses."); STATISTIC(NumP9Setb, "Number of compares lowered to setb."); +STATISTIC(NumMemopsAdaptedToDForm, + "Number of memory op offsets adjusted to match D-Form requirements"); // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", @@ -238,7 +240,7 @@ /// bit signed displacement. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 1); } /// SelectAddrIdx4 - Given the specified address, check to see if it can be @@ -264,16 +266,16 @@ /// SelectAddrIdxOnly - Given the specified address, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG, 1); } /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. - /// The last parameter \p 0 means D form has no requirment for 16 bit signed + /// The last parameter \p 1 means D form has no requirment for 16 bit signed /// displacement. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 1); } /// SelectAddrImmX4 - Returns true if the address N can be represented by @@ -356,6 +358,13 @@ bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); + bool updateMemOpOffsetsForDForm( + SmallVectorImpl> &DFormTracker); + void addMemOpToDFormTracker( + SDNode *MemOp, + SmallVectorImpl> &DFormTracker); + unsigned getBaseAndRequiredAlignmentFor(SDNode *MemOp, + const SDValue *&BasePtr) const; }; } // end anonymous namespace @@ -5480,6 +5489,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + SmallVector, 16> DFormTracker; bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { @@ -5494,6 +5504,8 @@ Res = combineToCMPB(N); break; } + if (ISD::isUNINDEXEDLoad(N) || ISD::isUNINDEXEDStore(N)) + addMemOpToDFormTracker(N, DFormTracker); if (!Res) foldBoolExts(Res, N); @@ -5510,6 +5522,7 @@ } } + MadeChange |= updateMemOpOffsetsForDForm(DFormTracker); if (MadeChange) CurDAG->RemoveDeadNodes(); } @@ -5526,6 +5539,231 @@ PeepholePPC64ZExt(); } +/// Look at memory operations in the DAG that have constant offsets from a +/// common base register. If those constants are such that they cannot be +/// encoded in a D-Form instruction (i.e. not multiples of 4 for DS-Form or +/// multiples of 16 for DQ-Form), produce a new base register from which the +/// offsets can be encoded. +/// Example: +/// 1: (store v4i32:$a, (add $x1, 8)) +/// 2: (store v4i32:$b, (add $x1, 24)) +/// Can become: +/// 1: (store v4i32:$a, (add $x1, 8)) +/// 2: (store v4i32:$b, (add (add $x1, 8), 16)) +/// While the first pair would produce: +/// li %1, 8 +/// stxvx %a, $x1, %1 +/// li %2, 24 +/// stxvx %b, $x1, %2 +/// The updated form would produce: +/// addi %1, $x1, 8 +/// stxvx %a, 0, %1 +/// stxv %b, 16(%1) +bool PPCDAGToDAGISel::updateMemOpOffsetsForDForm( + SmallVectorImpl> &DFormTracker) { + bool Changed = false; + // For each base register, find the entries in the vector that are uses + // of that register. + while (DFormTracker.size() > 1) { + SmallVector, 4> UsersOfCurrBase; + auto CurrElement = DFormTracker.pop_back_val(); + const SDValue *CurrAddr = CurrElement.first; + UsersOfCurrBase.push_back(CurrElement); + SDValue CurrBase = CurrAddr->getOperand(0); + for (int i = DFormTracker.size() - 1; i >= 0; i--) { + auto NextElement = DFormTracker[i]; + if (NextElement.first->getOperand(0) == CurrBase) { + UsersOfCurrBase.push_back(NextElement); + DFormTracker.erase(DFormTracker.begin() + i); + } + } + // Not profitable for a single memory operation. + if (UsersOfCurrBase.size() < 2) + continue; + + // Sort the vector in non-increasing order by the alignment requirement. + // We try to accomodate the ones with the highest alignment first. + std::sort(UsersOfCurrBase.begin(), UsersOfCurrBase.end(), + [](const std::pair &L, + const std::pair &R) { + return R.second < L.second; + }); + + LLVM_DEBUG(dbgs() << "Tracking address computation that may be used for " + "DForm memory ops\n"); + // We now have all the address nodes that use the same base register and the + // largest alignment required for the memory accesses that use that base + // register. We want to pick an anchor value on which we can base as many + // other address computations as possible. + // For example, if we have something like: + // (store v4i32:%a, (add $x1, 3)) + // (store v4i32:%b, (add $x1, 7)) + // (store v4i32:%c, (add $x1, 23)) + // (store v4i32:%d, (add $x1, 39)) + // We want to choose (add $x1, 7) as the anchor value since the following + // two can be based on it whereas selecting (add $x1, 3) would not allow + // us to base any of the others off of it. + // So we iterate over UsersOfCurrBase and compute a set of anchor values + // and indices for nodes we will base on that anchor. + // We could also look at the range to see if an optimal anchor would be + // some value in between (i.e. some value that we could compute with just + // an addis), but the analysis effort does not seem worth the miniscule + // gains it would provide. + SmallVector, 4> AnchorSets; + for (auto &Elem : UsersOfCurrBase) { + // For each anchor set, check if the address can be computed based on + // the first element (anchor) in the set. + int CurrOffset = + cast(Elem.first->getOperand(1))->getZExtValue(); + bool Inserted = false; + for (auto &AnchorSet : AnchorSets) { + int AnchorOffset = + cast(AnchorSet[0]->getOperand(1))->getZExtValue(); + if (isInt<16>(CurrOffset - AnchorOffset) && + (abs(CurrOffset - AnchorOffset) % Elem.second == 0)) { + AnchorSet.push_back(Elem.first); + Inserted = true; + LLVM_DEBUG(dbgs() << "Node: "); + LLVM_DEBUG(Elem.first->dump()); + LLVM_DEBUG(dbgs() << "Added to set anchored by: "); + LLVM_DEBUG(AnchorSet[0]->dump()); + break; + } + } + // If we were unable to add Addr to any of the anchor sets, create a new + // anchor set and add it. + if (!Inserted) { + LLVM_DEBUG(dbgs() << "Adding the following node as an anchor: "); + LLVM_DEBUG(Elem.first->dump()); + SmallVector Tmp; + Tmp.push_back(Elem.first); + AnchorSets.push_back(Tmp); + } + } + + LLVM_DEBUG(dbgs() << "Attempting to scale pointers to favour DForm ops\n"); + // FIXME: We could actually try merging sets to get the fewest possible add + // nodes, but there is currently no indication that this would be worth the + // effort based on workloads available. + for (auto &AnchorSet : AnchorSets) { + if (AnchorSet.size() == 1) + continue; + const SDValue &Anchor = *AnchorSet[0]; + for (int i = 1, e = AnchorSet.size(); i < e; i++) { + const SDValue *Addr = AnchorSet[i]; + SDLoc dl(Addr->getNode()); + int64_t NewOffset = + cast(Addr->getOperand(1))->getSExtValue() - + cast(Anchor.getOperand(1))->getSExtValue(); + // Rebase all the address computations off of AnchorSet[0]. + SDValue NewBase = + CurDAG->getNode(ISD::ADD, dl, Anchor.getValueType(), Anchor, + CurDAG->getIntPtrConstant(NewOffset, dl)); + LLVM_DEBUG(dbgs() << "Replaced: "); + LLVM_DEBUG(Addr->dump()); + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(NewBase.dump()); + NumMemopsAdaptedToDForm++; + CurDAG->ReplaceAllUsesOfValueWith(*Addr, NewBase); + Changed = true; + } + } + } + return Changed; +} + +// Get the base pointer and required alignment for a memory operation. If there +// are no D-Form instructions for the type being accessed, the required +// alignment is meaningless - signal this by returning -1U. +unsigned +PPCDAGToDAGISel::getBaseAndRequiredAlignmentFor(SDNode *MemOp, + const SDValue *&BasePtr) const { + assert((ISD::isUNINDEXEDLoad(MemOp) || ISD::isUNINDEXEDStore(MemOp)) && + "Expecting an unidexed memory op"); + unsigned Opc = MemOp->getOpcode(); + EVT MemVT; + bool IsSExt = false; + if (Opc == ISD::LOAD) { + LoadSDNode *LD = cast(MemOp); + BasePtr = &LD->getBasePtr(); + MemVT = LD->getMemoryVT(); + IsSExt = LD->getExtensionType() == ISD::SEXTLOAD; + } else { + StoreSDNode *ST = cast(MemOp); + BasePtr = &ST->getBasePtr(); + MemVT = ST->getMemoryVT(); + } + if (MemVT == MVT::f64 || MemVT == MVT::f32 || MemVT == MVT::i64 || + (MemVT == MVT::i32 && IsSExt)) + return 4; + if (MemVT.isVector() && PPCSubTarget->hasP9Vector()) + return 16; + // FIXME: We really only need to ignore vector memory operations on CPUs + // that don't have DForms if the same base pointer isn't also used for + // scalar operations. + if (MemVT.isVector()) + return -1U; + return 1; +} + +// Collect all the Base operands of memory operations which are produced +// by adding a multi-use value (base register) and a constant that is known +// not to match the requirements of a D-Form instruction for the type. +void PPCDAGToDAGISel::addMemOpToDFormTracker( + SDNode *MemOp, + SmallVectorImpl> &DFormTracker) { + + // We only collect load/store nodes fed by adds with constants. + assert((ISD::isUNINDEXEDLoad(MemOp) || ISD::isUNINDEXEDStore(MemOp)) && + "Expecting an unidexed memory op"); + const SDValue *AddrValue; + EVT MemVT; + unsigned RequiredAlignment = getBaseAndRequiredAlignmentFor(MemOp, AddrValue); + if (RequiredAlignment == -1U) + return; + + // We only care about addresses that are computed by adding a base register + // and a constant offset. + if (AddrValue->getOpcode() != ISD::ADD) + return; + ConstantSDNode *C = dyn_cast(AddrValue->getOperand(1)); + if (!C) + return; + + // If there is only a single use of the base register, we can't do any better + // than actually doing the addition for that one address computation. + SDValue BaseReg = AddrValue->getOperand(0); + if (BaseReg.hasOneUse()) + return; + + int Offset = C->getZExtValue(); + if (Offset % RequiredAlignment == 0 && isInt<16>(Offset)) + return; + + // If there are multiple uses of AddrValue, find the maximum required + // alignment among all of its uses. + if (!AddrValue->hasOneUse()) { + for (auto *UserOfAddr : AddrValue->getNode()->uses()) { + const SDValue *Dummy; + if (!ISD::isUNINDEXEDLoad(UserOfAddr) && + !ISD::isUNINDEXEDLoad(UserOfAddr)) + continue; + unsigned UserReqAlign = getBaseAndRequiredAlignmentFor(UserOfAddr, Dummy); + RequiredAlignment = + UserReqAlign > RequiredAlignment ? UserReqAlign : RequiredAlignment; + } + } + + // We need to save only a single pointer to any address node, so if we have + // multiple memory operations that have the same base pointer, we need to + // save just one. + for (auto Elem : DFormTracker) { + if (*Elem.first == *AddrValue) + return; + } + DFormTracker.push_back(std::make_pair(AddrValue, RequiredAlignment)); +} + // Check if all users of this node will become isel where the second operand // is the constant zero. If this is so, and if we can negate the condition, // then we can flip the true and false operands. This will allow the zero to Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -694,7 +694,8 @@ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, - SelectionDAG &DAG) const; + SelectionDAG &DAG, + unsigned EncodingAlignment = 0) const; Sched::Preference getSchedulingPreference(SDNode *N) const override; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -2273,8 +2273,20 @@ if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i - Base = N.getOperand(0); - Index = N.getOperand(1); + // If there are other uses of the address computation, we want to be able + // to reuse the node, so use ZERO for RA and the computed address as RB. + // However, we don't want to do this if we can use a pre-inc operation. + // Use the fact that EncodingAlignment defaults to zero for pre-inc + // computation but is set on calls at ISEL time. + if (!N->hasOneUse() && (DisablePPCPreinc || EncodingAlignment != 0)) { + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + N.getValueType()); + Index = N; + } + else { + Base = N.getOperand(0); + Index = N.getOperand(1); + } return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), imm) && @@ -2444,27 +2456,34 @@ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. -bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, - SDValue &Index, - SelectionDAG &DAG) const { +bool PPCTargetLowering::SelectAddressRegRegOnly( + SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, + unsigned EncodingAlignment) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. - if (SelectAddressRegReg(N, Base, Index, DAG)) + if (SelectAddressRegReg(N, Base, Index, DAG, EncodingAlignment)) return true; // If the address is the result of an add, we will utilize the fact that the // address calculation includes an implicit add. However, we can reduce // register pressure if we do not materialize a constant just for use as the // index register. We only get rid of the add if it is not an add of a - // value and a 16-bit signed constant and both have a single use. - int16_t imm = 0; - if (N.getOpcode() == ISD::ADD && - (!isIntS16Immediate(N.getOperand(1), imm) || - !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; + // value and a 16-bit signed constant and both have a single use. However, + // if the result of the add itself has multiple uses, we want to just use + // the node unless that will take away a pre-inc opportunity. + if (N.getOpcode() == ISD::ADD) { + bool NHasOneUse = N.hasOneUse(); + bool Op0HasOneUse = N.getOperand(0).hasOneUse(); + bool Op1HasOneUse = N.getOperand(1).hasOneUse(); + int16_t imm = 0; + if ((NHasOneUse || EncodingAlignment == 0) && + (!isIntS16Immediate(N.getOperand(1), imm) || !Op0HasOneUse || + !Op1HasOneUse)) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } } // Otherwise, do it the hard way, using R0 as the base register. @@ -2567,7 +2586,11 @@ // those situations here, and try with swapped Base/Offset instead. bool Swap = false; - if (isa(Base) || isa(Base)) + RegisterSDNode *RegNodeBase = dyn_cast(Base); + if (RegNodeBase && (RegNodeBase->getReg() == PPC::ZERO || + RegNodeBase->getReg() == PPC::ZERO8)) + return false; + if (isa(Base) || RegNodeBase) Swap = true; else if (!isLoad) { SDValue Val = cast(N)->getValue(); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -1442,6 +1442,8 @@ (ADDIS8 $in, tjumptable:$g)>; def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 $in, tblockaddress:$g)>; +def : Pat<(add i64:$in, imm64SExt32:$imm), + (ADDIS8 (ADDI8 $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // Patterns to match r+r indexed loads and stores for // addresses without at least 4-byte alignment. Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -381,6 +381,13 @@ // sign extended field. Used by instructions like 'addi'. return (int64_t)Imm == (short)Imm; }]>; +def imm64SExt32 : Operand, ImmLeaf(Imm) && ((Imm & ~0x8000) == Imm); +}]>; def immZExt16 : PatLeaf<(imm), [{ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended // field. Used by instructions like 'ori'. Index: test/CodeGen/PowerPC/dform-adjust.ll =================================================================== --- test/CodeGen/PowerPC/dform-adjust.ll +++ test/CodeGen/PowerPC/dform-adjust.ll @@ -4,27 +4,22 @@ define dso_local i64 @test1(i8* nocapture readonly %p, i32 signext %count) local_unnamed_addr #0 { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 5, -13 -; CHECK-NEXT: lxvx 0, 3, 5 -; CHECK-NEXT: li 5, 19 -; CHECK-NEXT: lxvx 1, 3, 5 -; CHECK-NEXT: li 5, 3 -; CHECK-NEXT: li 6, 7 -; CHECK-NEXT: li 7, 11 -; CHECK-NEXT: li 8, 15 +; CHECK-NEXT: addi 3, 3, -13 +; CHECK-NEXT: lxvx 0, 0, 3 ; CHECK-NEXT: mfvsrld 9, 0 -; CHECK-NEXT: ldx 5, 3, 5 -; CHECK-NEXT: ldx 6, 3, 6 -; CHECK-NEXT: ldx 7, 3, 7 -; CHECK-NEXT: ldx 3, 3, 8 ; CHECK-NEXT: mffprd 8, 0 +; CHECK-NEXT: mulld 8, 9, 8 +; CHECK-NEXT: ld 5, 16(3) +; CHECK-NEXT: lxv 1, 32(3) ; CHECK-NEXT: mfvsrld 10, 1 ; CHECK-NEXT: mfvsrd 11, 1 -; CHECK-NEXT: mulld 8, 9, 8 ; CHECK-NEXT: mulld 5, 8, 5 ; CHECK-NEXT: mulld 5, 5, 10 ; CHECK-NEXT: mulld 5, 5, 11 +; CHECK-NEXT: ld 6, 20(3) ; CHECK-NEXT: mulld 5, 5, 6 +; CHECK-NEXT: ld 7, 24(3) +; CHECK-NEXT: ld 3, 28(3) ; CHECK-NEXT: mulld 5, 5, 7 ; CHECK-NEXT: maddld 3, 5, 3, 4 ; CHECK-NEXT: blr @@ -67,13 +62,12 @@ ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: ori 6, 5, 40009 -; CHECK-NEXT: ori 7, 5, 40001 ; CHECK-NEXT: ori 5, 5, 40005 -; CHECK-NEXT: ldx 6, 3, 6 -; CHECK-NEXT: ldx 7, 3, 7 -; CHECK-NEXT: ldx 3, 3, 5 -; CHECK-NEXT: mulld 5, 7, 6 +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: ld 5, 4(3) +; CHECK-NEXT: ld 6, -4(3) +; CHECK-NEXT: ldx 3, 0, 3 +; CHECK-NEXT: mulld 5, 6, 5 ; CHECK-NEXT: maddld 3, 5, 3, 4 ; CHECK-NEXT: blr entry: @@ -96,14 +90,12 @@ define dso_local i64 @test3(i8* nocapture readonly %p, i32 signext %count) local_unnamed_addr { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lis 5, 1 -; CHECK-NEXT: ori 6, 5, 14497 -; CHECK-NEXT: ori 7, 5, 14465 -; CHECK-NEXT: ori 5, 5, 14481 -; CHECK-NEXT: ldx 6, 3, 6 -; CHECK-NEXT: ldx 7, 3, 7 -; CHECK-NEXT: ldx 3, 3, 5 -; CHECK-NEXT: mulld 5, 7, 6 +; CHECK-NEXT: addi 3, 3, 14481 +; CHECK-NEXT: addis 3, 3, 1 +; CHECK-NEXT: ld 5, 16(3) +; CHECK-NEXT: ld 6, -16(3) +; CHECK-NEXT: ldx 3, 0, 3 +; CHECK-NEXT: mulld 5, 6, 5 ; CHECK-NEXT: maddld 3, 5, 3, 4 ; CHECK-NEXT: blr entry: @@ -123,3 +115,34 @@ ret i64 %add6 } +define dso_local <4 x i32> @test(i8* nocapture readonly %Ptr) local_unnamed_addr #0 { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 3 +; CHECK-NEXT: lxvx 34, 3, 4 +; CHECK-NEXT: addi 3, 3, 39 +; CHECK-NEXT: lxv 35, -32(3) +; CHECK-NEXT: lxv 36, -16(3) +; CHECK-NEXT: lxvx 37, 0, 3 +; CHECK-NEXT: vadduwm 2, 3, 2 +; CHECK-NEXT: vadduwm 2, 2, 4 +; CHECK-NEXT: vadduwm 2, 2, 5 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %Ptr, i64 3 + %0 = bitcast i8* %add.ptr to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 16 + %add.ptr1 = getelementptr inbounds i8, i8* %Ptr, i64 7 + %2 = bitcast i8* %add.ptr1 to <4 x i32>* + %3 = load <4 x i32>, <4 x i32>* %2, align 16 + %add.ptr2 = getelementptr inbounds i8, i8* %Ptr, i64 23 + %4 = bitcast i8* %add.ptr2 to <4 x i32>* + %5 = load <4 x i32>, <4 x i32>* %4, align 16 + %add.ptr3 = getelementptr inbounds i8, i8* %Ptr, i64 39 + %6 = bitcast i8* %add.ptr3 to <4 x i32>* + %7 = load <4 x i32>, <4 x i32>* %6, align 16 + %add = add <4 x i32> %3, %1 + %add4 = add <4 x i32> %add, %5 + %add5 = add <4 x i32> %add4, %7 + ret <4 x i32> %add5 +} Index: test/CodeGen/PowerPC/float-load-store-pair.ll =================================================================== --- test/CodeGen/PowerPC/float-load-store-pair.ll +++ test/CodeGen/PowerPC/float-load-store-pair.ll @@ -62,17 +62,16 @@ ; CHECK-NEXT: addis 3, 2, a14@toc@ha ; CHECK-NEXT: lfd 0, a14@toc@l(3) ; CHECK-NEXT: addis 3, 2, a15@toc@ha -; CHECK-NEXT: addis 4, 2, a17@toc@ha -; CHECK-NEXT: addi 4, 4, a17@toc@l ; CHECK-NEXT: lxsd 2, a15@toc@l(3) ; CHECK-NEXT: addis 3, 2, a16@toc@ha -; CHECK-NEXT: addi 3, 3, a16@toc@l +; CHECK-NEXT: addis 4, 2, a17@toc@ha +; CHECK-NEXT: addi 4, 4, a17@toc@l ; CHECK-NEXT: lxvx 36, 0, 4 +; CHECK-NEXT: addi 3, 3, a16@toc@l ; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: li 3, 168 -; CHECK-NEXT: stxvx 36, 1, 3 -; CHECK-NEXT: li 3, 152 -; CHECK-NEXT: stxvx 35, 1, 3 +; CHECK-NEXT: addi 3, 1, 152 +; CHECK-NEXT: stxv 36, 16(3) +; CHECK-NEXT: stxvx 35, 0, 3 ; CHECK-NEXT: stxsd 2, 144(1) ; CHECK-NEXT: stfd 0, 136(1) ; CHECK-NEXT: bl _Z3fooddddddddddddddd Index: test/CodeGen/PowerPC/peephole-align.ll =================================================================== --- test/CodeGen/PowerPC/peephole-align.ll +++ test/CodeGen/PowerPC/peephole-align.ll @@ -240,10 +240,10 @@ ; CHECK-LABEL: test_misalign ; CHECK: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha ; CHECK-DAG: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l -; CHECK-DAG: li [[OFFSET_REG:[0-9]+]], 1 -; CHECK: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]] +; CHECK-DAG: addi [[OFFSET_REG:[0-9]+]], [[REGSTRUCT]], 1 +; CHECK: ldx [[REG0_0:[0-9]+]], 0, [[OFFSET_REG]] ; CHECK: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 -; CHECK: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]] +; CHECK: stdx [[REG0_1]], 0, [[OFFSET_REG]] define void @test_misalign() nounwind { entry: %0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1 Index: test/CodeGen/PowerPC/pre-inc-disable.ll =================================================================== --- test/CodeGen/PowerPC/pre-inc-disable.ll +++ test/CodeGen/PowerPC/pre-inc-disable.ll @@ -32,6 +32,7 @@ ; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader ; CHECK-NEXT: # ; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: xxpermdi v1, f0, f0, 2 ; CHECK-NEXT: vperm v6, v1, v3, v4 ; CHECK-NEXT: vperm v1, v3, v1, v2 @@ -45,11 +46,10 @@ ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: vextuwrx r7, r5, v1 -; CHECK-NEXT: lfdx f0, r3, r4 +; CHECK-NEXT: lfdx f0, 0, r3 +; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: add r6, r7, r6 -; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: xxpermdi v1, f0, f0, 2 -; CHECK-NEXT: add r3, r7, r4 ; CHECK-NEXT: vperm v6, v3, v1, v2 ; CHECK-NEXT: vperm v1, v1, v3, v4 ; CHECK-NEXT: xvnegsp v6, v6 @@ -61,8 +61,8 @@ ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 -; CHECK-NEXT: vextuwrx r8, r5, v1 -; CHECK-NEXT: add r6, r8, r6 +; CHECK-NEXT: vextuwrx r7, r5, v1 +; CHECK-NEXT: add r6, r7, r6 ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: extsw r3, r6 @@ -91,6 +91,7 @@ ; P9BE-NEXT: .LBB0_1: # %for.cond1.preheader ; P9BE-NEXT: # ; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: add r3, r3, r4 ; P9BE-NEXT: xxlor v1, vs0, vs0 ; P9BE-NEXT: vperm v6, v3, v1, v4 ; P9BE-NEXT: vperm v1, v3, v1, v2 @@ -104,11 +105,10 @@ ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: vextuwlx r7, r5, v1 -; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: lfdx f0, 0, r3 +; P9BE-NEXT: add r3, r3, r4 ; P9BE-NEXT: add r6, r7, r6 -; P9BE-NEXT: add r7, r3, r4 ; P9BE-NEXT: xxlor v1, vs0, vs0 -; P9BE-NEXT: add r3, r7, r4 ; P9BE-NEXT: vperm v6, v3, v1, v2 ; P9BE-NEXT: vperm v1, v3, v1, v4 ; P9BE-NEXT: xvnegsp v6, v6 @@ -120,8 +120,8 @@ ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 -; P9BE-NEXT: vextuwlx r8, r5, v1 -; P9BE-NEXT: add r6, r8, r6 +; P9BE-NEXT: vextuwlx r7, r5, v1 +; P9BE-NEXT: add r6, r7, r6 ; P9BE-NEXT: bdnz .LBB0_1 ; P9BE-NEXT: # %bb.2: # %for.cond.cleanup ; P9BE-NEXT: extsw r3, r6 @@ -285,14 +285,14 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; CHECK-LABEL: test32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add r5, r3, r4 -; CHECK-NEXT: lfiwzx f0, r3, r4 -; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-NEXT: lxvx v4, 0, r3 -; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-NEXT: lxvx v4, 0, r4 +; CHECK-NEXT: li r4, 4 ; CHECK-NEXT: xxpermdi v2, f0, f0, 2 -; CHECK-NEXT: lfiwzx f0, r5, r3 +; CHECK-NEXT: lfiwzx f0, r3, r4 ; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vperm v2, v2, v3, v4 ; CHECK-NEXT: xxpermdi v5, f0, f0, 2 @@ -308,14 +308,14 @@ ; ; P9BE-LABEL: test32: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: add r5, r3, r4 -; P9BE-NEXT: lfiwzx f0, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxvx v4, 0, r3 -; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: addi r4, r4, .LCPI2_0@toc@l +; P9BE-NEXT: lxvx v4, 0, r4 +; P9BE-NEXT: li r4, 4 ; P9BE-NEXT: xxsldwi v2, f0, f0, 1 -; P9BE-NEXT: lfiwzx f0, r5, r3 +; P9BE-NEXT: lfiwzx f0, r3, r4 ; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: vperm v2, v3, v2, v4 ; P9BE-NEXT: xxsldwi v5, f0, f0, 1 @@ -356,22 +356,22 @@ ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sldi r4, r4, 1 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: li r4, 16 ; CHECK-NEXT: lxsihzx v2, r3, r4 ; CHECK-NEXT: vsplth v2, v2, 3 +; CHECK-NEXT: lxsihzx v4, 0, r3 +; CHECK-NEXT: vsplth v4, v4, 3 +; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-NEXT: xxlxor v3, v3, v3 ; CHECK-NEXT: vmrglh v2, v3, v2 -; CHECK-NEXT: vsplth v4, v3, 7 -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: li r3, 16 -; CHECK-NEXT: vmrglw v2, v2, v4 -; CHECK-NEXT: lxsihzx v4, r6, r3 -; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NEXT: vmrglh v4, v3, v4 +; CHECK-NEXT: vsplth v3, v3, 7 ; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: vsplth v4, v4, 3 -; CHECK-NEXT: vmrglh v3, v3, v4 +; CHECK-NEXT: vmrglw v3, v4, v3 ; CHECK-NEXT: lxvx v4, 0, r3 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vperm v2, v3, v2, v4 +; CHECK-NEXT: vperm v2, v2, v3, v4 ; CHECK-NEXT: xxspltw v3, v2, 2 ; CHECK-NEXT: vadduwm v2, v2, v3 ; CHECK-NEXT: vextuwrx r3, r3, v2 @@ -382,14 +382,14 @@ ; P9BE-LABEL: test16: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: sldi r4, r4, 1 -; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: li r7, 16 -; P9BE-NEXT: lxsihzx v2, r6, r7 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: li r4, 16 +; P9BE-NEXT: lxsihzx v2, r3, r4 ; P9BE-NEXT: vsplth v2, v2, 3 -; P9BE-NEXT: lxsihzx v4, r3, r4 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: sldi r6, r6, 48 -; P9BE-NEXT: mtvsrd v3, r6 +; P9BE-NEXT: lxsihzx v4, 0, r3 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: sldi r4, r4, 48 +; P9BE-NEXT: mtvsrd v3, r4 ; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l ; P9BE-NEXT: vmrghh v2, v3, v2 @@ -441,14 +441,14 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxsibzx v2, r3, r4 -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: mtvsrd f0, r3 -; CHECK-NEXT: li r3, 8 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: lxsibzx v2, 0, r3 +; CHECK-NEXT: mtvsrd f0, r4 +; CHECK-NEXT: li r4, 8 ; CHECK-NEXT: xxswapd v3, vs0 ; CHECK-NEXT: vspltb v2, v2, 7 -; CHECK-NEXT: lxsibzx v5, r6, r3 +; CHECK-NEXT: lxsibzx v5, r3, r4 ; CHECK-NEXT: vspltb v5, v5, 7 ; CHECK-NEXT: vmrglb v2, v3, v2 ; CHECK-NEXT: vspltb v4, v3, 15 @@ -471,14 +471,14 @@ ; ; P9BE-LABEL: test8: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: li r7, 8 -; P9BE-NEXT: lxsibzx v2, r6, r7 +; P9BE-NEXT: add r3, r3, r4 +; P9BE-NEXT: li r4, 8 +; P9BE-NEXT: lxsibzx v2, r3, r4 ; P9BE-NEXT: vspltb v2, v2, 7 -; P9BE-NEXT: lxsibzx v4, r3, r4 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: sldi r6, r6, 56 -; P9BE-NEXT: mtvsrd v3, r6 +; P9BE-NEXT: lxsibzx v4, 0, r3 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: sldi r4, r4, 56 +; P9BE-NEXT: mtvsrd v3, r4 ; P9BE-NEXT: vmrghb v2, v3, v2 ; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l Index: test/CodeGen/PowerPC/qpx-unal-cons-lds.ll =================================================================== --- test/CodeGen/PowerPC/qpx-unal-cons-lds.ll +++ test/CodeGen/PowerPC/qpx-unal-cons-lds.ll @@ -9,18 +9,19 @@ ; CHECK-LABEL: @foo ; Make sure that the offset constants we use are all even (only the last should be odd). -; CHECK-DAG: li {{[0-9]+}}, 1056 -; CHECK-DAG: li {{[0-9]+}}, 1088 -; CHECK-DAG: li {{[0-9]+}}, 1152 -; CHECK-DAG: li {{[0-9]+}}, 1216 -; CHECK-DAG: li {{[0-9]+}}, 1280 -; CHECK-DAG: li {{[0-9]+}}, 1344 -; CHECK-DAG: li {{[0-9]+}}, 1408 -; CHECK-DAG: li {{[0-9]+}}, 1472 -; CHECK-DAG: li {{[0-9]+}}, 1536 -; CHECK-DAG: li {{[0-9]+}}, 1600 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1056 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1088 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1152 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1216 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1280 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1344 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1408 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1472 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1536 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1600 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1664 +; CHECK-DAG: addi {{[0-9]+}}, {{[0-9]+}}, 1024 ; CHECK-DAG: li {{[0-9]+}}, 1568 -; CHECK-DAG: li {{[0-9]+}}, 1664 ; CHECK-DAG: li {{[0-9]+}}, 1632 ; CHECK-DAG: li {{[0-9]+}}, 1728 ; CHECK-DAG: li {{[0-9]+}}, 1696 @@ -33,7 +34,6 @@ ; CHECK-DAG: li {{[0-9]+}}, 1984 ; CHECK-DAG: li {{[0-9]+}}, 1952 ; CHECK-DAG: li {{[0-9]+}}, 2016 -; CHECK-DAG: li {{[0-9]+}}, 1024 ; CHECK-DAG: li {{[0-9]+}}, 1120 ; CHECK-DAG: li {{[0-9]+}}, 1184 ; CHECK-DAG: li {{[0-9]+}}, 1248 Index: test/CodeGen/PowerPC/unal-altivec-wint.ll =================================================================== --- test/CodeGen/PowerPC/unal-altivec-wint.ll +++ test/CodeGen/PowerPC/unal-altivec-wint.ll @@ -16,10 +16,10 @@ ret <4 x i32> %a ; CHECK-LABEL: @test1 -; CHECK: li [[REG:[0-9]+]], 16 +; CHECK: addi [[REG:[0-9]+]], 3, 16 ; CHECK-NOT: li {{[0-9]+}}, 15 ; CHECK-DAG: lvx {{[0-9]+}}, 0, 3 -; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]] +; CHECK-DAG: lvx {{[0-9]+}}, 0, [[REG]] ; CHECK: blr } @@ -36,10 +36,10 @@ ret <4 x i32> %v0 ; CHECK-LABEL: @test2 -; CHECK: li [[REG:[0-9]+]], 16 +; CHECK: addi [[REG:[0-9]+]], 3, 16 ; CHECK-NOT: li {{[0-9]+}}, 15 ; CHECK-DAG: lvx {{[0-9]+}}, 0, 3 -; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]] +; CHECK-DAG: lvx {{[0-9]+}}, 0, [[REG]] ; CHECK: blr } Index: test/CodeGen/PowerPC/unal-altivec.ll =================================================================== --- test/CodeGen/PowerPC/unal-altivec.ll +++ test/CodeGen/PowerPC/unal-altivec.ll @@ -29,11 +29,11 @@ br i1 %10, label %for.end, label %vector.body ; CHECK: @foo -; CHECK-DAG: li [[C16:[0-9]+]], 16 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: +; CHECK-DAG: addi [[C16:[0-9]+]], 4, 16 ; CHECK-DAG: lvx [[LD1:[0-9]+]], 0, [[C0:[0-9]+]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[C0]], [[C16]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], 0, [[C16]] ; CHECK-DAG: lvsl [[MASK1:[0-9]+]], 0, [[C0]] ; CHECK-DAG: vperm [[VR1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] ; CHECK-DAG: vaddfp {{[0-9]+}}, [[VR1]], [[CNST]] Index: test/CodeGen/PowerPC/unal-vec-ldst.ll =================================================================== --- test/CodeGen/PowerPC/unal-vec-ldst.ll +++ test/CodeGen/PowerPC/unal-vec-ldst.ll @@ -23,10 +23,10 @@ ; CHECK-LABEL: @test_l_v32i8 ; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 16 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: lvx [[REG5:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] @@ -54,10 +54,10 @@ ; CHECK-LABEL: @test_l_v16i16 ; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 16 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: lvx [[REG5:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] @@ -85,10 +85,10 @@ ; CHECK-LABEL: @test_l_v8i32 ; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 16 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: lvx [[REG5:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] @@ -138,10 +138,10 @@ ; CHECK-LABEL: @test_l_v8float ; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 16 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: lvx [[REG5:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]] @@ -323,9 +323,9 @@ ; CHECK-LABEL: @test_l_qv8float ; CHECK-DAG: li [[REG1:[0-9]+]], 31 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 16 ; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3 ; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]] @@ -354,9 +354,9 @@ ; CHECK-LABEL: @test_l_qv8double ; CHECK-DAG: li [[REG1:[0-9]+]], 63 -; CHECK-DAG: li [[REG2:[0-9]+]], 32 +; CHECK-DAG: addi [[REG2:[0-9]+]], 3, 32 ; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]] -; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]] +; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, [[REG2]] ; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3 ; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3 ; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]]