Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -392,8 +392,12 @@ /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// Do FindBetterChain for a store and any possibly adjacent stores on - /// consecutive chains. + /// Try to replace a store and any possibly adjacent stores on + /// consecutive chains with better chains. Return true only if St is + /// replaced. + /// + /// Notice that other chains may still be replaced even if the function + /// returns false. bool findBetterNeighborChains(StoreSDNode *St); /// Match "(X shl/srl V1) & V2" where V2 may not be present. @@ -12102,6 +12106,7 @@ // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } + Chain = ST->getChain(); } // Try transforming N to an indexed store. @@ -14950,7 +14955,7 @@ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); @@ -15010,7 +15015,7 @@ } } - bool MadeChange = false; + bool MadeChangeToSt = false; SmallVector, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { @@ -15018,7 +15023,8 @@ SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { - MadeChange = true; + if (ChainedStore == St) + MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } @@ -15028,7 +15034,7 @@ for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); - return MadeChange; + return MadeChangeToSt; } /// This is the entry point for the file. Index: llvm/trunk/test/CodeGen/PowerPC/pr28130.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pr28130.ll +++ llvm/trunk/test/CodeGen/PowerPC/pr28130.ll @@ -0,0 +1,70 @@ +; RUN: llc -O0 < %s | FileCheck %s +target triple = "powerpc64le-unknown-linux-gnu" + +%StructA = type { double, double, double, double, double, double, double, double } + +define void @Test(%StructA* %tmp) unnamed_addr #0 align 2 { +; CHECK-LABEL: Test: +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: xxswapd [[OUTPUT:[0-9]+]] +; CHECK-NEXT: stxvd2x [[OUTPUT]] +bb: + %tmp2 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 0 + %tmp4 = bitcast %StructA* %tmp to <2 x double>* + %tmp5 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 2 + %tmp9 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 4 + %tmp11 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 5 + %tmp13 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 6 + %tmp15 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 7 + %tmp18 = load double, double* %tmp2, align 16 + %tmp19 = load double, double* %tmp11, align 8 + %tmp20 = load double, double* %tmp9, align 16 + %tmp21 = fsub double 1.210000e+04, %tmp20 + %tmp22 = fmul double %tmp18, %tmp21 + %tmp23 = fadd double %tmp20, %tmp22 + %tmp24 = load double, double* %tmp13, align 16 + %tmp25 = fsub double 1.000000e+02, %tmp24 + %tmp26 = fmul double %tmp18, %tmp25 + %tmp27 = fadd double %tmp24, %tmp26 + %tmp28 = load double, double* %tmp15, align 8 + %tmp29 = insertelement <2 x double> undef, double %tmp19, i32 0 + %tmp30 = insertelement <2 x double> %tmp29, double %tmp28, i32 1 + %tmp31 = fsub <2 x double> , %tmp30 + %tmp32 = insertelement <2 x double> undef, double %tmp18, i32 0 + %tmp33 = insertelement <2 x double> %tmp32, double %tmp18, i32 1 + %tmp34 = fmul <2 x double> %tmp33, %tmp31 + %tmp35 = fadd <2 x double> %tmp30, %tmp34 + %tmp36 = bitcast double* %tmp5 to <2 x double>* + %tmp37 = load <2 x double>, <2 x double>* %tmp36, align 16 + %tmp38 = fsub <2 x double> , %tmp37 + %tmp39 = fmul <2 x double> %tmp33, %tmp38 + %tmp40 = fadd <2 x double> %tmp37, %tmp39 + %tmp41 = fsub <2 x double> , %tmp40 + %tmp42 = fmul <2 x double> %tmp33, %tmp41 + %tmp43 = fadd <2 x double> %tmp40, %tmp42 + %tmp44 = fsub <2 x double> , %tmp35 + %tmp45 = fmul <2 x double> %tmp33, %tmp44 + %tmp46 = fadd <2 x double> %tmp35, %tmp45 + %tmp48 = fsub double 1.440000e+04, %tmp23 + %tmp49 = fmul double %tmp18, %tmp48 + %tmp50 = fadd double %tmp23, %tmp49 + store double %tmp50, double* %tmp9, align 16 + %tmp51 = fsub double 1.000000e+02, %tmp27 + %tmp52 = fmul double %tmp18, %tmp51 + %tmp53 = fadd double %tmp27, %tmp52 + store double %tmp53, double* %tmp13, align 16 + %tmp54 = extractelement <2 x double> %tmp46, i32 1 + store double %tmp54, double* %tmp15, align 8 + %tmp55 = bitcast double* %tmp5 to <2 x double>* + store <2 x double> %tmp43, <2 x double>* %tmp55, align 16 + ret void +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }