Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3939,6 +3939,36 @@ return V->getType() < V2->getType(); } +static Value *getReductionValue(PHINode *P, BasicBlock *ParentBB, + LoopInfo *LI) { + Value *Rdx = nullptr; + + if (P->getIncomingBlock(0) == ParentBB) { + Rdx = P->getIncomingValue(0); + } else if (P->getIncomingBlock(1) == ParentBB) { + Rdx = P->getIncomingValue(1); + } + + if (Rdx) + return Rdx; + + // Otherwise, check whether we have a loop latch to look at. + Loop *BBL = LI->getLoopFor(ParentBB); + if (!BBL) + return Rdx; + BasicBlock *BBLatch = BBL->getLoopLatch(); + if (!BBLatch) + return Rdx; + + if (P->getIncomingBlock(0) == BBLatch) { + Rdx = P->getIncomingValue(0); + } else if (P->getIncomingBlock(1) == BBLatch) { + Rdx = P->getIncomingValue(1); + } + + return Rdx; +} + bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector Incoming; @@ -4006,11 +4036,9 @@ // Check that the PHI is a reduction PHI. if (P->getNumIncomingValues() != 2) return Changed; - Value *Rdx = - (P->getIncomingBlock(0) == BB - ? (P->getIncomingValue(0)) - : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) - : nullptr)); + + Value *Rdx = getReductionValue(P, BB, LI); + // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null(Rdx); if (!BI) Index: test/Transforms/SLPVectorizer/AArch64/horizontal.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -71,3 +71,73 @@ %s.0.lcssa = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ] ret i32 %s.0.lcssa } + +; CHECK-LABEL: select_with_br +; CHECK: load <4 x i32> +; CHECK: load <4 x i32> +; CHECK: select <4 x i1> +define i32 @select_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 { +entry: + %cmp.23 = icmp sgt i32 %h, 0 + br i1 %cmp.23, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %idx.ext = sext i32 %lx to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %if.end.30 + %s.027 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %if.end.30 ] + %j.026 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.30 ] + %p2.025 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr32, %if.end.30 ] + %p1.024 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.30 ] + %0 = load i32, i32* %p1.024, align 4 + %1 = load i32, i32* %p2.025, align 4 + %sub = sub nsw i32 %0, %1 + %cmp2 = icmp slt i32 %sub, 0 + %sub3 = sub nsw i32 0, %sub + %sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub + %add = add nsw i32 %sub3.sub, %s.027 + %arrayidx4 = getelementptr inbounds i32, i32* %p1.024, i64 1 + %2 = load i32, i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32, i32* %p2.025, i64 1 + %3 = load i32, i32* %arrayidx5, align 4 + %sub6 = sub nsw i32 %2, %3 + %cmp7 = icmp slt i32 %sub6, 0 + %sub9 = sub nsw i32 0, %sub6 + %v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6 + %add11 = add nsw i32 %add, %v.1 + %arrayidx12 = getelementptr inbounds i32, i32* %p1.024, i64 2 + %4 = load i32, i32* %arrayidx12, align 4 + %arrayidx13 = getelementptr inbounds i32, i32* %p2.025, i64 2 + %5 = load i32, i32* %arrayidx13, align 4 + %sub14 = sub nsw i32 %4, %5 + %cmp15 = icmp slt i32 %sub14, 0 + %sub17 = sub nsw i32 0, %sub14 + %sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14 + %add19 = add nsw i32 %add11, %sub17.sub14 + %arrayidx20 = getelementptr inbounds i32, i32* %p1.024, i64 3 + %6 = load i32, i32* %arrayidx20, align 4 + %arrayidx21 = getelementptr inbounds i32, i32* %p2.025, i64 3 + %7 = load i32, i32* %arrayidx21, align 4 + %sub22 = sub nsw i32 %6, %7 + %cmp23 = icmp slt i32 %sub22, 0 + %sub25 = sub nsw i32 0, %sub22 + %v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22 + %add27 = add nsw i32 %add19, %v.3 + %cmp28 = icmp slt i32 %add27, %lim + br i1 %cmp28, label %if.end.30, label %for.end.loopexit + +if.end.30: ; preds = %for.body + %add.ptr = getelementptr inbounds i32, i32* %p1.024, i64 %idx.ext + %add.ptr32 = getelementptr inbounds i32, i32* %p2.025, i64 %idx.ext + %inc = add nuw nsw i32 %j.026, 1 + %cmp = icmp slt i32 %inc, %h + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body, %if.end.30 + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %s.1 = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ] + ret i32 %s.1 +}