diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15786,19 +15786,39 @@ } break; - case ISD::INTRINSIC_W_CHAIN: - // For little endian, VSX loads require generating lxvd2x/xxswapd. - // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. - if (Subtarget.needsSwapsForVSXMemOps()) { - switch (cast(N->getOperand(1))->getZExtValue()) { - default: - break; - case Intrinsic::ppc_vsx_lxvw4x: - case Intrinsic::ppc_vsx_lxvd2x: - return expandVSXLoadForLE(N, DCI); + case ISD::INTRINSIC_W_CHAIN: { + unsigned IID = cast(N->getOperand(1))->getZExtValue(); + if (IID == Intrinsic::ppc_altivec_vsum4sbs || + IID == Intrinsic::ppc_altivec_vsum4shs || + IID == Intrinsic::ppc_altivec_vsum4ubs) { + if (BuildVectorSDNode *BVN = + dyn_cast(N->getOperand(3))) { + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool BVNIsConstantSplat = BVN->isConstantSplat( + APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, + !Subtarget.isLittleEndian()); + // If the constant splat vector is 0, SAT bit will not be changed. + // The intrinsic should not have any side effect for this case. + if (BVNIsConstantSplat && APSplatBits == 0) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); + } + return SDValue(); } + // For little endian, VSX loads require generating lxvd2x/xxswapd. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. + if (Subtarget.needsSwapsForVSXMemOps()) { + switch (IID) { + default: + break; + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x: + return expandVSXLoadForLE(N, DCI); + } + } + break; } - break; case ISD::INTRINSIC_VOID: // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. diff --git a/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll b/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll --- a/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll +++ b/llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll @@ -9,8 +9,6 @@ define void @test1(<16 x i8> %0) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlxor v3, v3, v3 -; CHECK-NEXT: vsum4sbs v2, v2, v3 ; CHECK-NEXT: blr entry: %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4sbs(<16 x i8> %0, <4 x i32> zeroinitializer) @@ -20,8 +18,6 @@ define void @test2(<8 x i16> %0) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlxor v3, v3, v3 -; CHECK-NEXT: vsum4shs v2, v2, v3 ; CHECK-NEXT: blr entry: %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4shs(<8 x i16> %0, <4 x i32> zeroinitializer) @@ -31,8 +27,6 @@ define void @test3(<16 x i8> %0) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlxor v3, v3, v3 -; CHECK-NEXT: vsum4ubs v2, v2, v3 ; CHECK-NEXT: blr entry: %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4ubs(<16 x i8> %0, <4 x i32> zeroinitializer)