diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -93,6 +93,14 @@ /// Try to vectorize a chain that starts at two arithmetic instrs. bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); + /// Returns true when PHINode has values originated from an instruction + /// that has/uses a scalable vector type in its operand + bool phiNodeHasScalableOp(PHINode *Phi); + + /// Returns true when the basic block has instruction that has/uses a + /// scalable vector type in its operand + bool basicBlockHasScalableOp(BasicBlock *BB); + /// Try to vectorize a list of operands. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6759,7 +6759,8 @@ } // Vectorize trees that end at reductions. - Changed |= vectorizeChainsInBlock(BB, R); + if (!basicBlockHasScalableOp(BB)) + Changed |= vectorizeChainsInBlock(BB, R); // Vectorize the index computations of getelementptr instructions. This // is primarily intended to catch gather-like idioms ending at @@ -7041,6 +7042,26 @@ return tryToVectorizeList(VL, R, /*AllowReorder=*/true); } +bool SLPVectorizerPass::basicBlockHasScalableOp(BasicBlock *BB) { + for (Instruction &I : *BB) { + for (int i = 0; i < (int)I.getNumOperands(); i++) + if (isa(I.getOperand(i)->getType())) + return true; + } + return false; +} + +bool SLPVectorizerPass::phiNodeHasScalableOp(PHINode *Phi) { + for (int i = 0; i < (int)Phi->getNumIncomingValues(); i++) { + Value *V = Phi->getIncomingValue(i); + if (auto *Op = dyn_cast(V)) + for (int j = 0; j < (int)Op->getNumOperands(); j++) + if (isa(Op->getOperand(j)->getType())) + return true; + } + return false; +} + bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool AllowReorder) { if (VL.size() < 2) @@ -8294,14 +8315,14 @@ bool HaveVectorizedPhiNodes = true; while (HaveVectorizedPhiNodes) { HaveVectorizedPhiNodes = false; - + bool HasScalableVector = false; // Collect the incoming values from the PHIs. Incoming.clear(); for (Instruction &I : *BB) { PHINode *P = dyn_cast(&I); if (!P) break; - + HasScalableVector = phiNodeHasScalableOp(P); if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -8326,17 +8347,21 @@ unsigned NumElts = (SameTypeIt - IncIt); LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); - // The order in which the phi nodes appear in the program does not matter. - // So allow tryToVectorizeList to reorder them if it is beneficial. This - // is done when there are exactly two elements since tryToVectorizeList - // asserts that there are only two values when AllowReorder is true. - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, - /*AllowReorder=*/true)) { - // Success start over because instructions might have been changed. - HaveVectorizedPhiNodes = true; - Changed = true; - break; - } + // The current node does not have scalable types, but references to + // instructions in other blocks that uses scalable vector + if (!HasScalableVector) + // The order in which the phi nodes appear in the program does not + // matter. So allow tryToVectorizeList to reorder them if it is + // beneficial. This is done when there are exactly two elements since + // tryToVectorizeList asserts that there are only two values when + // AllowReorder is true. + if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, + /*AllowReorder=*/true)) { + // Success start over because instructions might have been changed. + HaveVectorizedPhiNodes = true; + Changed = true; + break; + } // Start over at the next instruction of a different type (or the end). IncIt = SameTypeIt; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll @@ -44,5 +44,68 @@ ret %retval } +define void @scalable_phi1() { +; CHECK-LABEL: @scalable_phi1( +; CHECK-NEXT: middle.block: +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ], [ undef, [[FOR_BODY_I]] ] +; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY_I]] ] +; CHECK-NEXT: br label [[FOR_BODY_I]] +; +middle.block: + %extract1 = extractelement undef, i32 undef + %extract2 = extractelement undef, i32 undef + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %middle.block + %recur1 = phi i16 [ %extract1, %middle.block ], [ undef, %for.body.i ] + %recur2 = phi i16 [ %extract2, %middle.block ], [ undef, %for.body.i ] + br label %for.body.i +} + +define void @scalable_phi2() { +; CHECK-LABEL: @scalable_phi2( +; CHECK-NEXT: middle.block: +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ] +; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY_I]] +; +middle.block: + %extract1 = extractelement undef, i32 undef + %extract2 = extractelement undef, i32 undef + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %middle.block + %recur1 = phi i16 [ undef, %for.body.i ], [ %extract1, %middle.block ] + %recur2 = phi i16 [ undef, %for.body.i ], [ %extract2, %middle.block ] + br label %for.body.i +} + +define @build_vec_v4i32_reuse_0( %v0) { +; CHECK-LABEL: @build_vec_v4i32_reuse_0( +; CHECK-NEXT: [[V0_0:%.*]] = extractelement [[V0:%.*]], i32 0 +; CHECK-NEXT: [[V0_1:%.*]] = extractelement [[V0]], i32 1 +; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]] +; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]] +; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]] +; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement undef, i32 [[TMP2_0]], i32 0 +; CHECK-NEXT: ret [[TMP3_0]] +; + %v0.0 = extractelement %v0, i32 0 + %v0.1 = extractelement %v0, i32 1 + %tmp0.0 = add i32 %v0.0, %v0.0 + %tmp1.0 = sub i32 %v0.0, %v0.1 + %tmp2.0 = add i32 %tmp0.0, %tmp1.0 + %tmp3.0 = insertelement undef, i32 %tmp2.0, i32 0 + ret %tmp3.0 +} + declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , ) declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, )