diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11090,14 +11090,35 @@ } } if (VectorizedTree) { + // Reorder operands of bool logical op in the natural order to avoid + // possible problem with poison propagation. If not possible to reorder + // (both operands are originally RHS), emit an extra freeze instruction + // for the LHS operand. + auto &&FixBoolLogicalOps = [&Builder](Value *&LHS, Value *&RHS, + Instruction *RedOp1, + Instruction *RedOp2) { + if (!isa(RedOp1) || !isBoolLogicOp(RedOp1)) + return; + if (getRdxOperand(RedOp1, 0) == LHS || + getRdxOperand(RedOp1, 1) != LHS || isGuaranteedNotToBePoison(LHS)) + return; + if (!isa(RedOp2) || !isBoolLogicOp(RedOp2)) + return; + if (getRdxOperand(RedOp2, 0) == RHS || + getRdxOperand(RedOp2, 1) != RHS || isGuaranteedNotToBePoison(RHS)) { + std::swap(LHS, RHS); + return; + } + LHS = Builder.CreateFreeze(LHS); + }; // Finish the reduction. // Need to add extra arguments and not vectorized possible reduction // values. // Try to avoid dependencies between the scalar remainders after // reductions. auto &&FinalGen = - [this, &Builder, - &TrackedVals](ArrayRef> InstVals) { + [this, &Builder, &TrackedVals, &FixBoolLogicalOps]( + ArrayRef> InstVals) { unsigned Sz = InstVals.size(); SmallVector> ExtraReds(Sz / 2 + Sz % 2); @@ -11114,6 +11135,11 @@ auto It2 = TrackedVals.find(RdxVal2); if (It2 != TrackedVals.end()) StableRdxVal2 = It2->second; + // To prevent poison from leaking across what used to be + // sequential, safe, scalar boolean logic operations, the + // reduction operand must be frozen. + FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first, + RedOp); Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1, StableRdxVal2, "op.rdx", ReductionOps); ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -308,7 +308,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP8]], i1 [[S2]], i1 false +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S2]], i1 [[TMP8]], i1 false ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP7]], i1 [[OP_RDX]], i1 false ; CHECK-NEXT: ret i1 [[OP_RDX1]] ; @@ -399,7 +399,7 @@ ; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]]) ; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 -; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false +; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP11]], i1 [[TMP10]], i1 false ; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[C2]], i1 false ; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 [[OP_RDX1]], i1 false ; SSE-NEXT: ret i1 [[OP_RDX2]] @@ -414,7 +414,7 @@ ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], ; AVX-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; AVX-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) -; AVX-NEXT: [[OP_RDX:%.*]] = select i1 [[C1]], i1 [[C0]], i1 false +; AVX-NEXT: [[OP_RDX:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false ; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[C2]], i1 false ; AVX-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP6]], i1 [[OP_RDX1]], i1 false ; AVX-NEXT: ret i1 [[OP_RDX2]]