Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -230,7 +230,9 @@ // - PHI: // - All uses of the PHI must be the reduction (safe). // - Otherwise, not safe. - // - By one instruction outside of the loop (safe). + // - By instructions outside of the loop (safe). + // * One value may have several outside users, but all outside + // uses must be of the same value. // - By further instructions outside of the loop (not safe). // - By an instruction that is not part of the reduction (not safe). // This is either: @@ -297,10 +299,15 @@ // Check if we found the exit user. BasicBlock *Parent = UI->getParent(); if (!TheLoop->contains(Parent)) { - // Exit if you find multiple outside users or if the header phi node is - // being used. In this case the user uses the value of the previous - // iteration, in which case we would loose "VF-1" iterations of the - // reduction operation if we vectorize. + // If we already know this instruction is used externally, move on to + // the next user. + if (ExitInstruction == Cur) + continue; + + // Exit if you find multiple values used outside or if the header phi + // node is being used. In this case the user uses the value of the + // previous iteration, in which case we would loose "VF-1" iterations of + // the reduction operation if we vectorize. if (ExitInstruction != nullptr || Cur == Phi) return false; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4091,13 +4091,10 @@ // we already fixed them. assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); - // We found our reduction value exit-PHI. Update it with the + // We found a reduction value exit-PHI. Update it with the // incoming bypass edge. - if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) { - // Add an edge coming from the bypass. + if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); - break; - } } // end of the LCSSA phi scan. // Fix the scalar loop reduction variable with the incoming reduction sum Index: test/Transforms/LoopVectorize/reduction.ll =================================================================== --- test/Transforms/LoopVectorize/reduction.ll +++ test/Transforms/LoopVectorize/reduction.ll @@ -493,3 +493,49 @@ %inc.2 = add nsw i32 %inc511.1.inc4.1, 2 ret i32 %inc.2 } + +;CHECK-LABEL: @reduction_sum_multiuse( +;CHECK: phi <4 x i32> +;CHECK: load <4 x i32> +;CHECK: add <4 x i32> +;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> +;CHECK: add <4 x i32> +;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> +;CHECK: add <4 x i32> +;CHECK: extractelement <4 x i32> %{{.*}}, i32 0 +;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ] +;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ] +;CHECK: ret i32 +define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph.preheader, label %end +.lr.ph.preheader: ; preds = %0 + br label %.lr.ph + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 + %6 = trunc i64 %indvars.iv to i32 + %7 = add i32 %sum.02, %6 + %8 = add i32 %7, %3 + %9 = add i32 %8, %5 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %sum.lcssa = phi i32 [ %9, %.lr.ph ] + %sum.copy = phi i32 [ %9, %.lr.ph ] + br label %end + +end: + %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ] + %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ] + %final = add i32 %f1, %f2 + ret i32 %final +}