Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1324,6 +1324,9 @@ bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const; + /// \returns True if the given type is supported for scalable vectors + bool isElementTypeLegalForScalableVector(Type *Ty) const; + /// \returns The new vector factor value if the target doesn't support \p /// SizeInBytes loads or has a better vector factor. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, @@ -1707,6 +1710,7 @@ unsigned AddrSpace) const = 0; virtual bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const = 0; + virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0; virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const = 0; @@ -2258,6 +2262,9 @@ ElementCount VF) const override { return Impl.isLegalToVectorizeReduction(RdxDesc, VF); } + bool isElementTypeLegalForScalableVector(Type *Ty) const override { + return Impl.isElementTypeLegalForScalableVector(Ty); + } unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override { Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -720,6 +720,8 @@ return true; } + bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1003,6 +1003,10 @@ return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF); } +bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const { + return TTIImpl->isElementTypeLegalForScalableVector(Ty); +} + unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -206,7 +206,7 @@ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); - bool isLegalElementTypeForSVE(Type *Ty) const { + bool isElementTypeLegalForScalableVector(Type *Ty) const { if (Ty->isPointerTy()) return true; @@ -231,7 +231,7 @@ if (isa(DataType) && !ST->useSVEForFixedLengthVectors()) return false; // Fall back to scalarization of masked operations. - return isLegalElementTypeForSVE(DataType->getScalarType()); + return isElementTypeLegalForScalableVector(DataType->getScalarType()); } bool isLegalMaskedLoad(Type *DataType, Align Alignment) { @@ -246,7 +246,7 @@ if (isa(DataType) || !ST->hasSVE()) return false; - return isLegalElementTypeForSVE(DataType->getScalarType()); + return isElementTypeLegalForScalableVector(DataType->getScalarType()); } bool isLegalMaskedGather(Type *DataType, Align Alignment) const { Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1645,7 +1645,7 @@ return true; Type *Ty = RdxDesc.getRecurrenceType(); - if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty)) + if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty)) return false; switch (RdxDesc.getRecurrenceKind()) { Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1513,13 +1513,18 @@ /// Returns true if the target machine supports all of the reduction /// variables found for the given VF. - bool canVectorizeReductions(ElementCount VF) { + bool canVectorizeReductions(ElementCount VF) const { return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { RecurrenceDescriptor RdxDesc = Reduction.second; return TTI.isLegalToVectorizeReduction(RdxDesc, VF); })); } + /// Returns true if we can widen all instructions in the loop using a maximum + /// scalable vectorization factor MaxVF. If the loop is illegal the function + /// returns an appropriate error remark in Msg. + bool canWidenLoopWithScalableVectors(ElementCount MaxVF) const; + /// Returns true if \p I is an instruction that will be scalarized with /// predication. Such instructions include conditional stores and /// instructions that may divide by zero. @@ -5661,6 +5666,34 @@ return false; } +bool LoopVectorizationCostModel::canWidenLoopWithScalableVectors( + ElementCount MaxVF) const { + // Disable scalable vectorization if the loop contains unsupported + // reductions. + if (!canVectorizeReductions(MaxVF)) { + reportVectorizationInfo("Scalable vectorization not supported for the " + "reduction operations found in this loop.", + "ScalableVFUnfeasible", ORE, TheLoop); + return false; + } + + // Disable scalable vectorization if the loop contains any instructions + // with element types not supported for scalable vectors. + for (BasicBlock *BB : TheLoop->blocks()) + for (Instruction &I : BB->instructionsWithoutDebug()) { + auto *Ty = I.getType(); + if (!Ty->isVoidTy() && !Ty->isIntegerTy(1) && + !TTI.isElementTypeLegalForScalableVector(Ty)) { + reportVectorizationInfo("Scalable vectorization is not supported " + "for all element types found in this loop.", + "ScalableVFUnfeasible", ORE, TheLoop); + return false; + } + } + + return true; +} + ElementCount LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) { @@ -5686,13 +5719,9 @@ // be replaced by a more detailed mechanism that filters out specific VFs, // instead of invalidating vectorization for a whole set of VFs based on the // MaxVF. - if (!canVectorizeReductions(MaxScalableVF)) { - reportVectorizationInfo( - "Scalable vectorization not supported for the reduction " - "operations found in this loop.", - "ScalableVFUnfeasible", ORE, TheLoop); + + if (!canWidenLoopWithScalableVectors(MaxScalableVF)) return ElementCount::getScalable(0); - } if (Legal->isSafeForAnyVectorWidth()) return MaxScalableVF; Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK ; Reduction can be vectorized Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -0,0 +1,82 @@ +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS +target triple = "aarch64-linux-gnu" + +; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop +define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_sve_i128 +; CHECK: vector.body +; CHECK: load i128, i128* {{.*}} +; CHECK-NEXT: load i128, i128* {{.*}} +; CHECK-NEXT: add nsw i128 {{.*}}, 42 +; CHECK-NEXT: add nsw i128 {{.*}}, 42 +; CHECK-NEXT: store i128 %4, i128* {{.*}} +; CHECK-NEXT: store i128 %5, i128* {{.*}} +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv + %0 = load i128, i128* %arrayidx, align 16 + %add = add nsw i128 %0, 42 + store i128 %add, i128* %arrayidx, align 16 + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop +define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_sve_f128 +; CHECK: vector.body +; CHECK: load fp128, fp128* +; CHECK-NEXT: load fp128, fp128* +; CHECK-NEXT: fsub fp128 {{.*}}, 0xL00000000000000008000000000000000 +; CHECK-NEXT: fsub fp128 {{.*}}, 0xL00000000000000008000000000000000 +; CHECK-NEXT: store fp128 %4, fp128* {{.*}} +; CHECK-NEXT: store fp128 %5, fp128* {{.*}} +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv + %0 = load fp128, fp128* %arrayidx, align 16 + %add = fsub fp128 %0, 0xL00000000000000008000000000000000 + store fp128 %add, fp128* %arrayidx, align 16 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) { +; CHECK-LABEL: @loop_fixed_width_i128 +; CHECK:load <4 x i128>, <4 x i128>* +; CHECK: add nsw <4 x i128> {{.*}}, +; CHECK: store <4 x i128> {{.*}} <4 x i128>* +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv + %0 = load i128, i128* %arrayidx, align 16 + %add = add nsw i128 %0, 42 + store i128 %add, i128* %arrayidx, align 16 + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}