diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -125,6 +125,11 @@ cl::init(true), cl::Hidden, cl::desc("prefer update form when ds form is also a update form")); +static cl::opt EnableUpdateFormForNonConstInc( + "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden, + cl::desc("prepare update form when the load/store increment is a loop " + "invariant non-const value.")); + static cl::opt EnableChainCommoning( "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden, cl::desc("Enable chain commoning in PPC loop prepare pass.")); @@ -212,7 +217,7 @@ // load/store with update like ldu/stdu, or Prefetch intrinsic. // For DS form instructions, their displacements must be multiple of 4. // For DQ form instructions, their displacements must be multiple of 16. - enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 }; + enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning }; class PPCLoopInstrFormPrep : public FunctionPass { public: @@ -255,7 +260,7 @@ /// Check if required PHI node is already exist in Loop \p L. bool alreadyPrepared(Loop *L, Instruction *MemI, const SCEV *BasePtrStartSCEV, - const SCEV *BasePtrIncSCEV, InstrForm Form); + const SCEV *BasePtrIncSCEV, PrepForm Form); /// Get the value which defines the increment SCEV \p BasePtrIncSCEV. Value *getNodeForInc(Loop *L, Instruction *MemI, @@ -293,8 +298,7 @@ /// Prepare all candidates in \p Buckets for displacement form, now for /// ds/dq. - bool dispFormPrep(Loop *L, SmallVector &Buckets, - InstrForm Form); + bool dispFormPrep(Loop *L, SmallVector &Buckets, PrepForm Form); /// Prepare for one chain \p BucketChain, find the best base element and /// update all other elements in \p BucketChain accordingly. @@ -302,8 +306,7 @@ /// If success, best base element must be stored as the first element of /// \p BucketChain. /// Return false if no base element found, otherwise return true. - bool prepareBaseForDispFormChain(Bucket &BucketChain, - InstrForm Form); + bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form); /// Prepare for one chain \p BucketChain, find the best base element and /// update all other elements in \p BucketChain accordingly. @@ -316,12 +319,12 @@ /// preparation. bool rewriteLoadStores(Loop *L, Bucket &BucketChain, SmallSet &BBChanged, - InstrForm Form); + PrepForm Form); /// Rewrite for the base load/store of a chain. std::pair rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV, - Instruction *BaseMemI, bool CanPreInc, InstrForm Form, + Instruction *BaseMemI, bool CanPreInc, PrepForm Form, SCEVExpander &SCEVE, SmallPtrSet &DeletedPtrs); /// Rewrite for the other load/stores of a chain according to the new \p @@ -572,9 +575,9 @@ assert(BasePtrSCEV->isAffine() && "Invalid SCEV type for the base ptr for a candidate chain!\n"); - std::pair Base = - rewriteForBase(L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr, - false /* CanPreInc */, UpdateForm, SCEVE, DeletedPtrs); + std::pair Base = rewriteForBase( + L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr, + false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs); if (!Base.first || !Base.second) return MadeChange; @@ -645,7 +648,7 @@ std::pair PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV, Instruction *BaseMemI, bool CanPreInc, - InstrForm Form, SCEVExpander &SCEVE, + PrepForm Form, SCEVExpander &SCEVE, SmallPtrSet &DeletedPtrs) { LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); @@ -675,6 +678,13 @@ return std::make_pair(nullptr, nullptr); } + if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) { + LLVM_DEBUG( + dbgs() + << "Update form prepare for non-const increment is not enabled!\n"); + return std::make_pair(nullptr, nullptr); + } + const SCEV *BasePtrStartSCEV = nullptr; if (CanPreInc) { assert(SE->isLoopInvariant(BasePtrIncSCEV, L) && @@ -884,7 +894,7 @@ } bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, - InstrForm Form) { + PrepForm Form) { // RemainderOffsetInfo details: // key: value of (Offset urem DispConstraint). For DSForm, it can // be [0, 4). @@ -1001,7 +1011,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores( Loop *L, Bucket &BucketChain, SmallSet &BBChanged, - InstrForm Form) { + PrepForm Form) { bool MadeChange = false; const SCEVAddRecExpr *BasePtrSCEV = @@ -1098,8 +1108,9 @@ return MadeChange; } -bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector &Buckets, - InstrForm Form) { +bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, + SmallVector &Buckets, + PrepForm Form) { bool MadeChange = false; if (Buckets.empty()) @@ -1202,7 +1213,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI, const SCEV *BasePtrStartSCEV, const SCEV *BasePtrIncSCEV, - InstrForm Form) { + PrepForm Form) { BasicBlock *BB = MemI->getParent(); if (!BB) return false; @@ -1242,7 +1253,7 @@ if (PHIBasePtrIncSCEV == BasePtrIncSCEV) { // The existing PHI (CurrentPHINode) has the same start and increment // as the PHI that we wanted to create. - if (Form == UpdateForm && + if ((Form == UpdateForm || Form == ChainCommoning ) && PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) { ++PHINodeAlreadyExistsUpdate; return true; diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -771,9 +771,9 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r9, -176(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r8, -168(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r7, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r9, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill ; CHECK-NEXT: blt cr0, .LBB7_7 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: sldi r6, r6, 2 @@ -789,66 +789,71 @@ ; CHECK-NEXT: rldicl r7, r7, 62, 2 ; CHECK-NEXT: sldi r10, r12, 2 ; CHECK-NEXT: ld r2, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: rldicl r7, r7, 2, 1 ; CHECK-NEXT: std r7, -184(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r8, r7, r10 +; CHECK-NEXT: mr r22, r7 ; CHECK-NEXT: mr r7, r4 -; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r8, r4, r10 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload ; CHECK-NEXT: sldi r8, r8, 3 ; CHECK-NEXT: add r9, r5, r8 -; CHECK-NEXT: add r8, r2, r10 -; CHECK-NEXT: add r10, r31, r10 +; CHECK-NEXT: add r8, r3, r10 +; CHECK-NEXT: add r10, r2, r10 ; CHECK-NEXT: sldi r10, r10, 3 ; CHECK-NEXT: sldi r8, r8, 3 ; CHECK-NEXT: add r30, r5, r10 ; CHECK-NEXT: add r29, r7, r10 -; CHECK-NEXT: add r28, r3, r10 +; CHECK-NEXT: add r28, r4, r10 ; CHECK-NEXT: sldi r10, r12, 1 ; CHECK-NEXT: add r8, r5, r8 ; CHECK-NEXT: add r11, r12, r10 -; CHECK-NEXT: add r0, r4, r11 +; CHECK-NEXT: add r0, r22, r11 ; CHECK-NEXT: sldi r0, r0, 3 ; CHECK-NEXT: add r27, r5, r0 -; CHECK-NEXT: add r0, r2, r11 -; CHECK-NEXT: add r11, r31, r11 +; CHECK-NEXT: add r0, r3, r11 +; CHECK-NEXT: add r11, r2, r11 ; CHECK-NEXT: sldi r11, r11, 3 ; CHECK-NEXT: sldi r0, r0, 3 ; CHECK-NEXT: add r25, r5, r11 ; CHECK-NEXT: add r24, r7, r11 -; CHECK-NEXT: add r23, r3, r11 -; CHECK-NEXT: add r11, r4, r10 +; CHECK-NEXT: add r23, r4, r11 +; CHECK-NEXT: add r11, r22, r10 ; CHECK-NEXT: add r26, r5, r0 +; CHECK-NEXT: mr r0, r22 ; CHECK-NEXT: sldi r11, r11, 3 ; CHECK-NEXT: add r22, r5, r11 -; CHECK-NEXT: add r11, r2, r10 -; CHECK-NEXT: add r10, r31, r10 +; CHECK-NEXT: add r11, r3, r10 +; CHECK-NEXT: add r10, r2, r10 ; CHECK-NEXT: sldi r10, r10, 3 ; CHECK-NEXT: sldi r11, r11, 3 ; CHECK-NEXT: add r20, r5, r10 ; CHECK-NEXT: add r19, r7, r10 -; CHECK-NEXT: add r18, r3, r10 -; CHECK-NEXT: add r10, r12, r4 +; CHECK-NEXT: add r18, r4, r10 +; CHECK-NEXT: add r10, r12, r0 ; CHECK-NEXT: add r21, r5, r11 ; CHECK-NEXT: sldi r11, r2, 3 ; CHECK-NEXT: sldi r10, r10, 3 ; CHECK-NEXT: add r17, r5, r10 -; CHECK-NEXT: add r10, r12, r2 +; CHECK-NEXT: add r10, r12, r3 ; CHECK-NEXT: sldi r10, r10, 3 ; CHECK-NEXT: add r16, r5, r10 -; CHECK-NEXT: add r10, r12, r31 -; CHECK-NEXT: sldi r31, r31, 3 -; CHECK-NEXT: sub r0, r11, r31 -; CHECK-NEXT: sldi r11, r4, 3 -; CHECK-NEXT: mr r4, r7 -; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r10, r12, r2 ; CHECK-NEXT: sldi r10, r10, 3 ; CHECK-NEXT: add r15, r5, r10 -; CHECK-NEXT: add r14, r3, r10 -; CHECK-NEXT: sub r31, r11, r31 -; CHECK-NEXT: add r2, r4, r10 +; CHECK-NEXT: add r14, r7, r10 +; CHECK-NEXT: add r31, r4, r10 +; CHECK-NEXT: sldi r10, r3, 3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: mr r4, r7 +; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: sub r0, r10, r11 +; CHECK-NEXT: sldi r10, r7, 3 +; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: sub r2, r10, r11 ; CHECK-NEXT: li r11, 0 ; CHECK-NEXT: mr r10, r12 -; CHECK-NEXT: rldicl r7, r7, 2, 1 ; CHECK-NEXT: addi r7, r7, -4 ; CHECK-NEXT: rldicl r7, r7, 62, 2 ; CHECK-NEXT: addi r7, r7, 1 @@ -857,8 +862,8 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lfd f0, 0(r14) -; CHECK-NEXT: lfd f1, 0(r2) +; CHECK-NEXT: lfd f0, 0(r31) +; CHECK-NEXT: lfd f1, 0(r14) ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 @@ -868,16 +873,16 @@ ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r15) ; CHECK-NEXT: add r15, r15, r7 -; CHECK-NEXT: lfdx f0, r14, r0 -; CHECK-NEXT: lfdx f1, r2, r0 +; CHECK-NEXT: lfdx f0, r31, r0 +; CHECK-NEXT: lfdx f1, r14, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r16, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r16, r11 -; CHECK-NEXT: lfdx f0, r14, r31 -; CHECK-NEXT: lfdx f1, r2, r31 +; CHECK-NEXT: lfdx f0, r31, r2 +; CHECK-NEXT: lfdx f1, r14, r2 +; CHECK-NEXT: add r31, r31, r7 ; CHECK-NEXT: add r14, r14, r7 -; CHECK-NEXT: add r2, r2, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r17, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 @@ -894,8 +899,8 @@ ; CHECK-NEXT: lfdx f1, r21, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r21, r11 -; CHECK-NEXT: lfdx f0, r18, r31 -; CHECK-NEXT: lfdx f1, r19, r31 +; CHECK-NEXT: lfdx f0, r18, r2 +; CHECK-NEXT: lfdx f1, r19, r2 ; CHECK-NEXT: add r18, r18, r7 ; CHECK-NEXT: add r19, r19, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 @@ -914,8 +919,8 @@ ; CHECK-NEXT: lfdx f1, r26, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r26, r11 -; CHECK-NEXT: lfdx f0, r23, r31 -; CHECK-NEXT: lfdx f1, r24, r31 +; CHECK-NEXT: lfdx f0, r23, r2 +; CHECK-NEXT: lfdx f1, r24, r2 ; CHECK-NEXT: add r23, r23, r7 ; CHECK-NEXT: add r24, r24, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 @@ -934,8 +939,8 @@ ; CHECK-NEXT: lfdx f1, r8, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r8, r11 -; CHECK-NEXT: lfdx f0, r28, r31 -; CHECK-NEXT: lfdx f1, r29, r31 +; CHECK-NEXT: lfdx f0, r28, r2 +; CHECK-NEXT: lfdx f1, r29, r2 ; CHECK-NEXT: add r28, r28, r7 ; CHECK-NEXT: add r29, r29, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 @@ -948,46 +953,44 @@ ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: beq cr0, .LBB7_7 ; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader -; CHECK-NEXT: ld r0, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r7, -176(r1) # 8-byte Folded Reload ; CHECK-NEXT: sldi r8, r12, 3 -; CHECK-NEXT: add r0, r10, r0 +; CHECK-NEXT: ld r12, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r12, r10, r12 ; CHECK-NEXT: add r7, r10, r7 -; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: sldi r0, r12, 3 ; CHECK-NEXT: sldi r11, r7, 3 -; CHECK-NEXT: add r30, r5, r0 -; CHECK-NEXT: add r29, r4, r0 -; CHECK-NEXT: add r28, r3, r0 -; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r12, r5, r0 +; CHECK-NEXT: add r30, r4, r0 +; CHECK-NEXT: add r29, r3, r0 +; CHECK-NEXT: ld r0, -168(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r7, r5, r11 ; CHECK-NEXT: add r9, r4, r11 ; CHECK-NEXT: add r11, r3, r11 ; CHECK-NEXT: add r10, r10, r0 -; CHECK-NEXT: sub r12, r10, r12 ; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: sldi r12, r12, 3 ; CHECK-NEXT: add r5, r5, r10 +; CHECK-NEXT: add r4, r4, r10 +; CHECK-NEXT: add r3, r3, r10 ; CHECK-NEXT: li r10, 0 -; CHECK-NEXT: add r3, r3, r12 -; CHECK-NEXT: add r4, r4, r12 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_6: # %for.body.epil ; CHECK-NEXT: # -; CHECK-NEXT: lfdux f0, r4, r8 -; CHECK-NEXT: lfdux f1, r3, r8 +; CHECK-NEXT: lfdx f0, r3, r10 +; CHECK-NEXT: lfdx f1, r4, r10 ; CHECK-NEXT: addi r6, r6, -1 ; CHECK-NEXT: cmpldi r6, 0 -; CHECK-NEXT: xsmuldp f0, f1, f0 +; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r5) ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r5) ; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: lfdx f0, r28, r10 -; CHECK-NEXT: lfdx f1, r29, r10 -; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfdx f0, r29, r10 ; CHECK-NEXT: lfdx f1, r30, r10 +; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfdx f1, r12, r10 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r30, r10 +; CHECK-NEXT: stfdx f0, r12, r10 ; CHECK-NEXT: lfdx f0, r11, r10 ; CHECK-NEXT: lfdx f1, r9, r10 ; CHECK-NEXT: xsmuldp f0, f0, f1 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: -ppc-formprep-update-nonconst-inc -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s ; long long foo(char *p, int n, int count) { ; int j = 0; diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -45,22 +45,22 @@ ; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: add 23, 6, 12 ; CHECK-NEXT: add 22, 6, 30 -; CHECK-NEXT: add 26, 6, 28 -; CHECK-NEXT: add 25, 6, 8 -; CHECK-NEXT: sldi 24, 6, 3 -; CHECK-NEXT: sldi 26, 26, 3 +; CHECK-NEXT: add 25, 6, 28 +; CHECK-NEXT: add 24, 6, 8 +; CHECK-NEXT: sldi 26, 6, 3 ; CHECK-NEXT: sldi 25, 25, 3 +; CHECK-NEXT: sldi 24, 24, 3 ; CHECK-NEXT: sldi 23, 23, 3 ; CHECK-NEXT: sldi 22, 22, 3 -; CHECK-NEXT: add 24, 4, 24 -; CHECK-NEXT: add 26, 29, 26 +; CHECK-NEXT: add 26, 4, 26 ; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: add 24, 29, 24 ; CHECK-NEXT: add 23, 3, 23 ; CHECK-NEXT: add 22, 3, 22 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfd 0, 0(24) +; CHECK-NEXT: lfd 0, 0(26) ; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: add 6, 6, 10 ; CHECK-NEXT: cmpd 6, 27 @@ -81,6 +81,15 @@ ; CHECK-NEXT: lfd 1, 24(22) ; CHECK-NEXT: add 22, 22, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, -16(24) +; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, -8(24) +; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, 0(24) +; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, 8(24) +; CHECK-NEXT: add 24, 24, 11 +; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, -16(25) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, -8(25) @@ -90,17 +99,8 @@ ; CHECK-NEXT: lfd 1, 8(25) ; CHECK-NEXT: add 25, 25, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(26) -; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(26) -; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(26) -; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(26) +; CHECK-NEXT: stfd 0, 0(26) ; CHECK-NEXT: add 26, 26, 11 -; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfd 0, 0(24) -; CHECK-NEXT: add 24, 24, 11 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: