diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -632,7 +632,8 @@ return 1; } - unsigned getNumberOfParts(Type *Tp) const { return 0; } + // Assume that we have a register of the right size for the type. + unsigned getNumberOfParts(Type *Tp) const { return 1; } InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2026,7 +2026,7 @@ unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTLI()->getTypeLegalizationCost(DL, Tp); - return *LT.first.getValue(); + return LT.first.isValid() ? *LT.first.getValue() : 0; } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7428,9 +7428,14 @@ Type *VectorTy; InstructionCost C = getInstructionCost(I, VF, VectorTy); - bool TypeNotScalarized = - VF.isVector() && VectorTy->isVectorTy() && - TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue(); + bool TypeNotScalarized = false; + if (VF.isVector() && VectorTy->isVectorTy()) { + unsigned NumParts = TTI.getNumberOfParts(VectorTy); + if (NumParts) + TypeNotScalarized = NumParts < VF.getKnownMinValue(); + else + C = InstructionCost::getInvalid(); + } return VectorizationCostTy(C, TypeNotScalarized); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -0,0 +1,51 @@ +; REQUIRES: asserts +; RUN: opt -scalable-vectorization=on -loop-vectorize -S < %s -debug 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +target triple = "aarch64-unknown-linux-gnu" + +; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ] +; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %addi7 = add i7 %indvars.iv1294, 0 +; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv.next1295 = add i7 %indvars.iv1294, 1 + +define void @induction_i7(i64* %dst) #0 { +; CHECK-LABEL: @induction_i7( +; CHECK: vector.ph: +; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() +; CHECK: [[TMP5:%.*]] = trunc %4 to +; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i7 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[DST:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to * +; CHECK-NEXT: store zeroinitializer, * [[TMP14]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ] + %indvars.iv1286 = phi i64 [ %indvars.iv.next1287, %for.body ], [ 0, %entry ] + %addi7 = add i7 %indvars.iv1294, 0 + %arrayidx = getelementptr inbounds i64, i64* %dst, i64 %indvars.iv1286 + store i64 0, i64* %arrayidx, align 8 + %indvars.iv.next1287 = add nuw nsw i64 %indvars.iv1286, 1 + %indvars.iv.next1295 = add i7 %indvars.iv1294, 1 + %exitcond = icmp eq i64 %indvars.iv.next1287, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = {"target-features"="+sve"}