diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2676,6 +2676,15 @@ return; } + // Don't handle scalable vectors + if (S.getOpcode() == Instruction::ExtractElement && + isa( + cast(S.OpValue)->getOperand(0)->getType())) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } + // Don't handle vectors. if (S.OpValue->getType()->isVectorTy() && !isa(S.OpValue)) { @@ -3742,8 +3751,13 @@ // Check that gather of extractelements can be represented as just a // shuffle of a single/two vectors the scalars are extracted from. SmallVector Mask; - Optional ShuffleKind = - isShuffle(VL, Mask); + Optional ShuffleKind; + if (isa((cast(E->getMainOp())) + ->getVectorOperandType())) + ShuffleKind = isGatherShuffledEntry(E, Mask, Entries); + else + ShuffleKind = isShuffle(VL, Mask); + if (ShuffleKind.hasValue()) { // Found the bunch of extractelement instructions that must be gathered // into a vector and can be represented as a permutation elements in a diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll @@ -44,5 +44,99 @@ ret %retval } +define void @scalable_phi1() { +; CHECK-LABEL: @scalable_phi1( +; CHECK-NEXT: middle.block: +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ], [ undef, [[FOR_BODY_I]] ] +; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY_I]] ] +; CHECK-NEXT: br label [[FOR_BODY_I]] +; +middle.block: + %extract1 = extractelement undef, i32 undef + %extract2 = extractelement undef, i32 undef + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %middle.block + %recur1 = phi i16 [ %extract1, %middle.block ], [ undef, %for.body.i ] + %recur2 = phi i16 [ %extract2, %middle.block ], [ undef, %for.body.i ] + br label %for.body.i +} + +define void @scalable_phi2() { +; CHECK-LABEL: @scalable_phi2( +; CHECK-NEXT: middle.block: +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ] +; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY_I]] +; +middle.block: + %extract1 = extractelement undef, i32 undef + %extract2 = extractelement undef, i32 undef + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %middle.block + %recur1 = phi i16 [ undef, %for.body.i ], [ %extract1, %middle.block ] + %recur2 = phi i16 [ undef, %for.body.i ], [ %extract2, %middle.block ] + br label %for.body.i +} + +define @build_vec_v4i32_reuse_0( %v0) { +; CHECK-LABEL: @build_vec_v4i32_reuse_0( +; CHECK-NEXT: [[V0_0:%.*]] = extractelement [[V0:%.*]], i32 0 +; CHECK-NEXT: [[V0_1:%.*]] = extractelement [[V0]], i32 1 +; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]] +; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]] +; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]] +; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement undef, i32 [[TMP2_0]], i32 0 +; CHECK-NEXT: ret [[TMP3_0]] +; + %v0.0 = extractelement %v0, i32 0 + %v0.1 = extractelement %v0, i32 1 + %tmp0.0 = add i32 %v0.0, %v0.0 + %tmp1.0 = sub i32 %v0.0, %v0.1 + %tmp2.0 = add i32 %tmp0.0, %tmp1.0 + %tmp3.0 = insertelement undef, i32 %tmp2.0, i32 0 + ret %tmp3.0 +} + +define @shuffle(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3 +; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1 +; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2 +; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]] +; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]] +; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]] +; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]] +; CHECK-NEXT: [[INS1:%.*]] = insertelement poison, i8 [[X0X0]], i32 0 +; CHECK-NEXT: [[INS2:%.*]] = insertelement [[INS1]], i8 [[X3X3]], i32 1 +; CHECK-NEXT: [[INS3:%.*]] = insertelement [[INS2]], i8 [[Y1Y1]], i32 2 +; CHECK-NEXT: [[INS4:%.*]] = insertelement [[INS3]], i8 [[Y2Y2]], i32 3 +; CHECK-NEXT: ret [[INS4]] +; + %x0 = extractelement <4 x i8> %x, i32 0 + %x3 = extractelement <4 x i8> %x, i32 3 + %y1 = extractelement <4 x i8> %y, i32 1 + %y2 = extractelement <4 x i8> %y, i32 2 + %x0x0 = mul i8 %x0, %x0 + %x3x3 = mul i8 %x3, %x3 + %y1y1 = mul i8 %y1, %y1 + %y2y2 = mul i8 %y2, %y2 + %ins1 = insertelement poison, i8 %x0x0, i32 0 + %ins2 = insertelement %ins1, i8 %x3x3, i32 1 + %ins3 = insertelement %ins2, i8 %y1y1, i32 2 + %ins4 = insertelement %ins3, i8 %y2y2, i32 3 + ret %ins4 +} + declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , ) declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, )