diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14403,6 +14403,11 @@ return SDValue(); if (LSBase->getOpcode() == ISD::LOAD) { + // If the load has more than one user except the shufflevector instruction, + // it is not profitable to replace the shufflevector with a reverse load. + if (!LSBase->hasOneUse()) + return SDValue(); + SDLoc dl(SVN); SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; return DAG.getMemIntrinsicNode( @@ -14411,6 +14416,12 @@ } if (LSBase->getOpcode() == ISD::STORE) { + // If there are other uses of the shuffle, the swap cannot be avoided. + // Forcing the use of an X-Form (since swapped stores only have + // X-Forms) without removing the swap is unprofitable. + if (!SVN->hasOneUse()) + return SDValue(); + SDLoc dl(LSBase); SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), LSBase->getBasePtr()}; diff --git a/llvm/test/CodeGen/PowerPC/vsx-shuffle-le-multiple-uses.ll b/llvm/test/CodeGen/PowerPC/vsx-shuffle-le-multiple-uses.ll --- a/llvm/test/CodeGen/PowerPC/vsx-shuffle-le-multiple-uses.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-shuffle-le-multiple-uses.ll @@ -7,8 +7,8 @@ ; CHECK-LABEL: loadHasMultipleUses: ; CHECK: # %bb.0: ; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxswapd 34, 0 ; CHECK-NEXT: stxv 0, 0(4) -; CHECK-NEXT: lxvd2x 34, 0, 3 ; CHECK-NEXT: blr %v1 = load <2 x double>, <2 x double>* %p1 store <2 x double> %v1, <2 x double>* %p2, align 16 @@ -19,10 +19,8 @@ define <2 x double> @storeHasMultipleUses(<2 x double> %v, <2 x double>* %p) { ; CHECK-LABEL: storeHasMultipleUses: ; CHECK: # %bb.0: -; CHECK-NEXT: xxswapd 0, 34 -; CHECK-NEXT: addi 3, 5, 256 -; CHECK-NEXT: stxvd2x 34, 0, 3 -; CHECK-NEXT: xxlor 34, 0, 0 +; CHECK-NEXT: xxswapd 34, 34 +; CHECK-NEXT: stxv 34, 256(5) ; CHECK-NEXT: blr %v1 = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> < i32 1, i32 0> %addr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 16