diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -261,6 +261,10 @@ PHINode *RealPHI = nullptr; PHINode *ImagPHI = nullptr; + /// Set this flag to true if RealPHI and ImagPHI were reached during reduction + /// detection. + bool PHIsFound = false; + /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode. /// The new PHINode corresponds to a vector of deinterleaved complex numbers. /// This mapping is populated during @@ -1419,7 +1423,8 @@ FinalReduction = dyn_cast(U); } - if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B) + if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B || + isa(FinalReduction)) continue; ReductionInfo[ReductionOp] = {&PHI, FinalReduction}; @@ -1460,6 +1465,7 @@ RealPHI = ReductionInfo[Real].first; ImagPHI = ReductionInfo[Imag].first; + PHIsFound = false; auto Node = identifyNode(Real, Imag); if (!Node) { std::swap(Real, Imag); @@ -1467,9 +1473,10 @@ Node = identifyNode(Real, Imag); } - // If a node is identified, mark its operation instructions as used to - // prevent re-identification and attach the node to the real part - if (Node) { + // If a node is identified and reduction PHINode is used in the chain of + // operations, mark its operation instructions as used to prevent + // re-identification and attach the node to the real part + if (Node && PHIsFound) { LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: " << *Real << " / " << *Imag << "\n"); Processed[i] = true; @@ -1762,6 +1769,7 @@ if (Real != RealPHI || Imag != ImagPHI) return nullptr; + PHIsFound = true; NodePtr PlaceholderNode = prepareCompositeNode( ComplexDeinterleavingOperation::ReductionPHI, Real, Imag); return submitCompositeNode(PlaceholderNode); diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll @@ -236,4 +236,31 @@ %.fca.0.1.insert = insertvalue %"struct.std::complex" %.fca.0.0.insert, double %18, 0, 1 ret %"struct.std::complex" %.fca.0.1.insert } + +; The reduced bug from D153355. Shows that reduction was detected where it did not exist. +define void @bug(i1 %exitcond.not) { +; CHECK-LABEL: bug: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: .LBB3_1: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: tbz w0, #0, .LBB3_1 +; CHECK-NEXT: // %bb.2: // %for.end.loopexit +; CHECK-NEXT: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %vec_r.0190 = phi <4 x float> [ zeroinitializer, %entry ], [ %lane62, %for.body ] + %vec_b.0188 = phi <4 x float> [ zeroinitializer, %entry ], [ %lane76, %for.body ] + %add.i175 = fadd <4 x float> %vec_b.0188, %vec_r.0190 + %lane62 = shufflevector <4 x float> , <4 x float> zeroinitializer, <4 x i32> zeroinitializer + %lane76 = shufflevector <4 x float> , <4 x float> zeroinitializer, <4 x i32> zeroinitializer + br i1 %exitcond.not, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + %mul.i177 = fadd <4 x float> %lane62, %add.i175 + %mul.i179 = fadd <4 x float> %lane76, %add.i175 + ret void +} + declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)