diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6690,8 +6690,21 @@ VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I); } } - // Update users. + + // Update users. For a min/max reduction that ends with a compare and + // select, we also have to RAUW for the compare instruction feeding the + // reduction root. That's because the original compare may have extra uses + // besides the final select of the reduction. + if (ReductionData.isMinMax() && isa(VectorizedTree)) { + assert(isa(ReductionRoot) && + "Expected min/max reduction to have select root instruction"); + + Value *ScalarCond = cast(ReductionRoot)->getCondition(); + Value *VectorCond = cast(VectorizedTree)->getCondition(); + ScalarCond->replaceAllUsesWith(VectorCond); + } ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the // vector reductions. V.eraseInstructions(IgnoreList); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -91,7 +91,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]] ; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]] ; CHECK-NEXT: [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]] -; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 undef, i32 3, i32 4 +; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4 ; CHECK-NEXT: store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8 ; CHECK-NEXT: ret i32 [[T17]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -71,7 +71,7 @@ ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] ; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]] -; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], undef +; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[TMP44]] ; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]] ; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0