Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3191,6 +3191,20 @@ return; } + if (S.getOpcode() == Instruction::SExt || + S.getOpcode() == Instruction::ZExt || + S.getOpcode() == Instruction::Trunc) { + auto *ExtInst = cast(S.OpValue); + Instruction *Op = dyn_cast(ExtInst->getOperand(0)); + if (Op && Op->getOpcode() == Instruction::ExtractElement && + isa( + cast(Op)->getVectorOperandType())) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } + } + // Don't handle vectors. if (S.OpValue->getType()->isVectorTy() && !isa(S.OpValue)) { Index: llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll @@ -138,5 +138,62 @@ ret %ins4 } +define void @sext_scalable_extractelement() { +; CHECK-LABEL: @sext_scalable_extractelement( +; CHECK-NEXT: [[X0:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]] +; CHECK-NEXT: ret void +; + %x0 = extractelement undef, i32 undef + %1 = sext i32 %x0 to i64 + %2 = getelementptr inbounds i64, i64* undef, i64 %1 + %3 = extractelement undef, i32 undef + %4 = sext i32 %3 to i64 + %5 = getelementptr inbounds i64, i64* undef, i64 %4 + ret void +} + +define void @zext_scalable_extractelement() { +; CHECK-LABEL: @zext_scalable_extractelement( +; CHECK-NEXT: [[X0:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]] +; CHECK-NEXT: ret void +; + %x0 = extractelement undef, i32 undef + %1 = zext i32 %x0 to i64 + %2 = getelementptr inbounds i64, i64* undef, i64 %1 + %3 = extractelement undef, i32 undef + %4 = zext i32 %3 to i64 + %5 = getelementptr inbounds i64, i64* undef, i64 %4 + ret void +} + +define void @trunc_scalable_extractelement() { +; CHECK-LABEL: @trunc_scalable_extractelement( +; CHECK-NEXT: [[X0:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement undef, i32 undef +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP4]] +; CHECK-NEXT: ret void +; + %x0 = extractelement undef, i32 undef + %1 = trunc i64 %x0 to i32 + %2 = getelementptr inbounds i32, i32* undef, i32 %1 + %3 = extractelement undef, i32 undef + %4 = trunc i64 %3 to i32 + %5 = getelementptr inbounds i32, i32* undef, i32 %4 + ret void +} + declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , ) declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, )