Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6736,8 +6736,23 @@ VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I); } } - // Update users. + + // Update users. For a min/max reduction that ends with a compare and + // select, we also have to RAUW for the compare instruction feeding the + // reduction root. That's because the compare may have extra uses besides + // the final select of the reduction. + Instruction *RdxCmp, *Cmp; + if (match(cast(ReductionRoot), + m_Select(m_Instruction(RdxCmp), m_Value(), m_Value())) && + (RdxCmp->getOpcode() == Instruction::ICmp || + RdxCmp->getOpcode() == Instruction::FCmp) && + match(VectorizedTree, + m_Select(m_Instruction(Cmp), m_Value(), m_Value())) && + Cmp->getOpcode() == RdxCmp->getOpcode()) { + RdxCmp->replaceAllUsesWith(Cmp); + } ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the // vector reductions. V.eraseInstructions(IgnoreList); Index: llvm/test/Transforms/SLPVectorizer/X86/reduction.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -68,7 +68,7 @@ ret i32 %sum.0.lcssa } -; FIXME: PR43948 - https://bugs.llvm.org/show_bug.cgi?id=43948 +; PR43948 - https://bugs.llvm.org/show_bug.cgi?id=43948 ; The extra use of a non-vectorized element of a reduction must not be killed. define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) { @@ -91,7 +91,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]] ; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]] ; CHECK-NEXT: [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]] -; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 undef, i32 3, i32 4 +; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4 ; CHECK-NEXT: store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8 ; CHECK-NEXT: ret i32 [[T17]] ; Index: llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -71,7 +71,7 @@ ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] ; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]] -; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], undef +; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[TMP44]] ; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]] ; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0