Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3952,8 +3952,7 @@ Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(), KV.second); auto *TruncatedTy = VectorType::get( - ScalarTruncatedTy, - cast(OriginalTy)->getElementCount()); + ScalarTruncatedTy, cast(OriginalTy)->getElementCount()); if (TruncatedTy == OriginalTy) continue; Index: llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; ModuleID = 'bla2.ll' -; RUN: opt < %s -loop-vectorize -scalable-vectorization=preferred -S | FileCheck %s -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" +; RUN: opt < %s -loop-vectorize -scalable-vectorization=preferred -force-target-supports-scalable-vectors -S | FileCheck %s define void @trunc_minimal_bitwidth(i8* %bptr, i16* %hptr, i32 %sptr, i64 %dptr) #0 { ; CHECK-LABEL: @trunc_minimal_bitwidth( @@ -16,7 +13,7 @@ ; CHECK-NEXT: br label [[FOR_BODY_PRE]] ; CHECK: for.body.pre: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[DPTR]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: @@ -27,47 +24,29 @@ ; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[DPTR]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[DPTR]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i32 [[SPTR:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement poison, i32 [[SPTR]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector [[BROADCAST_SPLATINSERT8]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i32 [[SPTR:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[BPTR]], align 1, !alias.scope !0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[TMP10]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[BPTR]], align 1, !alias.scope !0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i8 [[TMP11]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = zext [[BROADCAST_SPLAT]] to -; CHECK-NEXT: [[TMP13:%.*]] = zext [[BROADCAST_SPLAT5]] to -; CHECK-NEXT: [[TMP14:%.*]] = trunc [[BROADCAST_SPLAT7]] to -; CHECK-NEXT: [[TMP15:%.*]] = trunc [[BROADCAST_SPLAT9]] to -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16* [[TMP18]] to * -; CHECK-NEXT: store [[TMP14]], * [[TMP19]], align 2, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], 8 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i16* [[TMP22]] to * -; CHECK-NEXT: store [[TMP15]], * [[TMP23]], align 2, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP25]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[BPTR]], align 1, !alias.scope !0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[TMP5]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = zext [[BROADCAST_SPLAT]] to +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT5]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to * +; CHECK-NEXT: store [[TMP7]], * [[TMP10]], align 2, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[DPTR]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_BODY_PRE_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -76,8 +55,8 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP27:%.*]] = load i8, i8* [[BPTR]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[BPTR]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP14]] to i32 ; CHECK-NEXT: [[CONV21:%.*]] = trunc i32 [[SPTR]] to i16 ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i16 [[CONV21]], i16* [[ARRAYIDX23]], align 2 @@ -99,8 +78,6 @@ %arrayidx23 = getelementptr inbounds i16, i16* %hptr, i64 %indvars.iv store i16 %conv21, i16* %arrayidx23, align 2 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %dptr + %exitcond.not = icmp eq i64 %indvars.iv.next, %dptr br i1 %exitcond.not, label %for.body.pre, label %for.body } - -attributes #0 = { "target-features"="+neon,+sve" }