Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -393,8 +393,8 @@ SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Do FindBetterChain for a store and any possibly adjacent stores on - /// consecutive chains. - bool findBetterNeighborChains(StoreSDNode *St); + /// consecutive chains. Return true if found a better chain for St. + bool findBetterNeighborChains(SDValue &Chain, StoreSDNode *St); /// Match "(X shl/srl V1) & V2" where V2 may not be present. bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); @@ -12086,7 +12086,7 @@ // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. - if (findBetterNeighborChains(ST)) { + if (findBetterNeighborChains(Chain, ST)) { // replaceStoreChain uses CombineTo, which handled all of the worklist // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); @@ -14940,7 +14940,7 @@ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { +bool DAGCombiner::findBetterNeighborChains(SDValue &Chain, StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); @@ -15000,7 +15000,7 @@ } } - bool MadeChange = false; + bool MadeChangeToSt = false; SmallVector, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { @@ -15008,7 +15008,8 @@ SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { - MadeChange = true; + if (ChainedStore == St) + MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } @@ -15018,7 +15019,10 @@ for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); - return MadeChange; + if (!MadeChangeToSt) + Chain = St->getChain(); + + return MadeChangeToSt; } /// This is the entry point for the file. Index: test/CodeGen/PowerPC/pr28130.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/pr28130.ll @@ -0,0 +1,70 @@ +; RUN: llc -O0 < %s | FileCheck %s +target triple = "powerpc64le-unknown-linux-gnu" + +%StructA = type { double, double, double, double, double, double, double, double } + +define void @Test(%StructA* %tmp) unnamed_addr #0 align 2 { +; CHECK-LABEL: Test: +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: lxvd2x +; CHECK-NEXT: xxswapd +; CHECK: xxswapd [[OUTPUT:[0-9]+]] +; CHECK-NEXT: stxvd2x [[OUTPUT]] +bb: + %tmp2 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 0 + %tmp4 = bitcast %StructA* %tmp to <2 x double>* + %tmp5 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 2 + %tmp9 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 4 + %tmp11 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 5 + %tmp13 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 6 + %tmp15 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 7 + %tmp18 = load double, double* %tmp2, align 16 + %tmp19 = load double, double* %tmp11, align 8 + %tmp20 = load double, double* %tmp9, align 16 + %tmp21 = fsub double 1.210000e+04, %tmp20 + %tmp22 = fmul double %tmp18, %tmp21 + %tmp23 = fadd double %tmp20, %tmp22 + %tmp24 = load double, double* %tmp13, align 16 + %tmp25 = fsub double 1.000000e+02, %tmp24 + %tmp26 = fmul double %tmp18, %tmp25 + %tmp27 = fadd double %tmp24, %tmp26 + %tmp28 = load double, double* %tmp15, align 8 + %tmp29 = insertelement <2 x double> undef, double %tmp19, i32 0 + %tmp30 = insertelement <2 x double> %tmp29, double %tmp28, i32 1 + %tmp31 = fsub <2 x double> , %tmp30 + %tmp32 = insertelement <2 x double> undef, double %tmp18, i32 0 + %tmp33 = insertelement <2 x double> %tmp32, double %tmp18, i32 1 + %tmp34 = fmul <2 x double> %tmp33, %tmp31 + %tmp35 = fadd <2 x double> %tmp30, %tmp34 + %tmp36 = bitcast double* %tmp5 to <2 x double>* + %tmp37 = load <2 x double>, <2 x double>* %tmp36, align 16 + %tmp38 = fsub <2 x double> , %tmp37 + %tmp39 = fmul <2 x double> %tmp33, %tmp38 + %tmp40 = fadd <2 x double> %tmp37, %tmp39 + %tmp41 = fsub <2 x double> , %tmp40 + %tmp42 = fmul <2 x double> %tmp33, %tmp41 + %tmp43 = fadd <2 x double> %tmp40, %tmp42 + %tmp44 = fsub <2 x double> , %tmp35 + %tmp45 = fmul <2 x double> %tmp33, %tmp44 + %tmp46 = fadd <2 x double> %tmp35, %tmp45 + %tmp48 = fsub double 1.440000e+04, %tmp23 + %tmp49 = fmul double %tmp18, %tmp48 + %tmp50 = fadd double %tmp23, %tmp49 + store double %tmp50, double* %tmp9, align 16 + %tmp51 = fsub double 1.000000e+02, %tmp27 + %tmp52 = fmul double %tmp18, %tmp51 + %tmp53 = fadd double %tmp27, %tmp52 + store double %tmp53, double* %tmp13, align 16 + %tmp54 = extractelement <2 x double> %tmp46, i32 1 + store double %tmp54, double* %tmp15, align 8 + %tmp55 = bitcast double* %tmp5 to <2 x double>* + store <2 x double> %tmp43, <2 x double>* %tmp55, align 16 + ret void +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }