Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4255,14 +4255,6 @@ MapVector ExtraArgs; BinaryOperator *ReductionRoot = nullptr; - // After successfull horizontal reduction vectorization attempt for PHI node - // vectorizer tries to update root binary op by combining vectorized tree and - // the ReductionPHI node. But during vectorization this ReductionPHI can be - // vectorized itself and replaced by the undef value, while the instruction - // itself is marked for deletion. This 'marked for deletion' PHI node then can - // be used in new binary operation, causing "Use still stuck around after Def - // is destroyed" crash upon PHI node deletion. - WeakVH ReductionPHI; /// The opcode of the reduction. Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd; @@ -4323,7 +4315,6 @@ ReductionOpcode = B->getOpcode(); ReducedValueOpcode = 0; ReductionRoot = B; - ReductionPHI = Phi; // We currently only support adds. if ((ReductionOpcode != Instruction::Add && @@ -4411,9 +4402,9 @@ Stack.push_back(std::make_pair(I, 0)); continue; } - // NextV is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), NextV); } + // NextV is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), NextV); } return true; } @@ -4502,12 +4493,7 @@ } } // Update users. - if (ReductionPHI && !isa(ReductionPHI)) { - assert(ReductionRoot && "Need a reduction operation"); - ReductionRoot->setOperand(0, VectorizedTree); - ReductionRoot->setOperand(1, ReductionPHI); - } else - ReductionRoot->replaceAllUsesWith(VectorizedTree); + ReductionRoot->replaceAllUsesWith(VectorizedTree); } return VectorizedTree != nullptr; } Index: test/Transforms/SLPVectorizer/AArch64/gather-root.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -9,7 +9,7 @@ @a = common global [80 x i8] zeroinitializer, align 16 ; DEFAULT-LABEL: @PR28330( -; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ] +; DEFAULT: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ] ; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> , <8 x i32> ; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> ; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]] @@ -18,10 +18,10 @@ ; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> ; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]] ; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0 -; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17 +; DEFAULT: %bin.extra = add i32 %[[R6]], %tmp17 ; ; GATHER-LABEL: @PR28330( -; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ] +; GATHER: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ] ; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80 ; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80 ; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80 @@ -45,7 +45,7 @@ ; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> ; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]] ; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0 -; GATHER: %tmp34 = add i32 %[[R6]], %tmp17 +; GATHER: %bin.extra = add i32 %[[R6]], %tmp17 ; ; MAX-COST-LABEL: @PR28330( ; MAX-COST-NOT: shufflevector @@ -98,7 +98,7 @@ ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> ; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef ; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef @@ -114,8 +114,8 @@ ; DEFAULT-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; DEFAULT-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; DEFAULT-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP3]], -5 -; DEFAULT-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]] +; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5 +; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -138,7 +138,7 @@ ; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]] ; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80 @@ -169,8 +169,8 @@ ; GATHER-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; GATHER-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; GATHER-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP8]], -5 -; GATHER-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]] +; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5 +; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( Index: test/Transforms/SLPVectorizer/X86/reduction_loads.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* %p, i64 7 ; CHECK-NEXT: br label %for.body ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %add.7, %for.body ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %bin.extra, %for.body ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %p to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] @@ -32,10 +32,10 @@ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[TMP4]], [[SUM]] -; CHECK-NEXT: br i1 true, label %for.end, label %for.body +; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP4]], [[SUM]] +; CHECK: br i1 true, label %for.end, label %for.body ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[ADD_7]] +; CHECK-NEXT: ret i32 [[BIN_EXTRA]] ; entry: %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 Index: test/Transforms/SLPVectorizer/X86/scheduling.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/scheduling.ll +++ test/Transforms/SLPVectorizer/X86/scheduling.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[TMP15]], +; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]], ; CHECK: ret i32 [[ADD52]] ; entry: