Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2571,7 +2571,7 @@ // compare. The only way that we get a backedge taken count is that the // induction variable was signed and as such will not overflow. In such a case // truncation is legal. - if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() > + if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) > IdxTy->getPrimitiveSizeInBits()) BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy); BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy); Index: llvm/test/Transforms/LoopVectorize/pr45259.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%ty = type { i8, i8 } + +define dso_local void @widget() local_unnamed_addr { +; CHECK-LABEL: @widget( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[T:%.*]] = alloca [[TY:%.*]], align 1 +; CHECK-NEXT: [[T5:%.*]] = bitcast %ty* [[T]] to i8* +; CHECK-NEXT: [[T1:%.*]] = ptrtoint %ty* [[T]] to i64 +; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [[TY]], %ty* [[T]], i64 0, i32 0 +; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i8, i8* [[T3]], i64 undef +; CHECK-NEXT: store i8 0, i8* [[T4]], align 1 +; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [[TY]], %ty* [[T]], i64 0, i32 1 +; CHECK-NEXT: [[T9:%.*]] = load i8, i8* [[T8]], align 1 +; CHECK-NEXT: [[T10:%.*]] = sext i8 [[T9]] to i32 +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[T1_0:%.*]] = phi i8* [ [[T3]], [[BB:%.*]] ], [ null, [[BB6]] ] +; CHECK-NEXT: [[T2_0:%.*]] = phi i32 [ 0, [[BB]] ], [ 1, [[BB6]] ] +; CHECK-NEXT: [[T11:%.*]] = icmp eq i32 [[T2_0]], [[T10]] +; CHECK-NEXT: br i1 [[T11]], label [[BB14_PREHEADER:%.*]], label [[BB6]] +; CHECK: bb14.preheader: +; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi i8* [ [[T1_0]], [[BB6]] ] +; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint i8* [[T1_0_LCSSA]] to i64 +; CHECK-NEXT: [[T3_I2:%.*]] = icmp slt i8 [[T9]], 2 +; CHECK-NEXT: [[T203:%.*]] = sext i8 [[T9]] to i64 +; CHECK-NEXT: [[T214:%.*]] = getelementptr inbounds i8, i8* [[T3]], i64 [[T203]] +; CHECK-NEXT: [[T225:%.*]] = icmp eq i8* [[T1_0_LCSSA]], [[T214]] +; CHECK-NEXT: br i1 [[T225]], label [[BB27:%.*]], label [[BB23_PREHEADER:%.*]] +; CHECK: bb23.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[T9]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[T1_0_LCSSA2]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP6]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T9]], -1 +; CHECK-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[T1_0_LCSSA2]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[T5]], i64 [[TMP9]] +; CHECK-NEXT: [[UGLYGEP6:%.*]] = ptrtoint i8* [[UGLYGEP]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[UGLYGEP6]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[UGLYGEP6]], 255 +; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]] +; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 +; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[T9]], [[CAST_CRD]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[T9]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i8> [[DOTSPLAT]], +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP20:%.*]] = add <16 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP21:%.*]] = add <16 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP22:%.*]] = icmp slt <16 x i8> [[TMP20]], +; CHECK-NEXT: [[TMP23:%.*]] = icmp slt <16 x i8> [[TMP21]], +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = sext i8 [[TMP24]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = sext i8 [[TMP26]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[T3]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, i8* [[T3]], i64 [[TMP27]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[BB14_BB27_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T9]], [[BB23_PREHEADER]] ], [ [[T9]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[BB23:%.*]] +; CHECK: bb23: +; CHECK-NEXT: [[T186:%.*]] = phi i8 [ [[T26:%.*]], [[BB23]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[T26]] = add i8 [[T186]], -1 +; CHECK-NEXT: [[T3_I:%.*]] = icmp slt i8 [[T26]], 2 +; CHECK-NEXT: [[T20:%.*]] = sext i8 [[T26]] to i64 +; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i8, i8* [[T3]], i64 [[T20]] +; CHECK-NEXT: [[T22:%.*]] = icmp eq i8* [[T1_0_LCSSA]], [[T21]] +; CHECK-NEXT: br i1 [[T22]], label [[BB14_BB27_CRIT_EDGE]], label [[BB23]], !llvm.loop !2 +; CHECK: bb14.bb27_crit_edge: +; CHECK-NEXT: [[T26_LCSSA:%.*]] = phi i8 [ [[T26]], [[BB23]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: store i8 [[T26_LCSSA]], i8* [[T8]], align 1 +; CHECK-NEXT: br label [[BB27]] +; CHECK: bb27: +; CHECK-NEXT: ret void +; +bb: + %t = alloca %ty, align 1 + %t3 = getelementptr inbounds %ty, %ty* %t, i64 0, i32 0 + %t4 = getelementptr inbounds i8, i8* %t3, i64 undef + store i8 0, i8* %t4, align 1 + %t8 = getelementptr inbounds %ty, %ty* %t, i64 0, i32 1 + %t9 = load i8, i8* %t8, align 1 + %t10 = sext i8 %t9 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %t1.0 = phi i8* [ %t3, %bb ], [ null, %bb6 ] + %t2.0 = phi i32 [ 0, %bb ], [ 1, %bb6 ] + %t11 = icmp eq i32 %t2.0, %t10 + br i1 %t11, label %bb14.preheader, label %bb6 + +bb14.preheader: ; preds = %bb6 + %t1.0.lcssa = phi i8* [ %t1.0, %bb6 ] + %t3.i2 = icmp slt i8 %t9, 2 + %t203 = sext i8 %t9 to i64 + %t214 = getelementptr inbounds i8, i8* %t3, i64 %t203 + %t225 = icmp eq i8* %t1.0.lcssa, %t214 + br i1 %t225, label %bb27, label %bb23.preheader + +bb23.preheader: ; preds = %bb14.preheader + br label %bb23 + +bb23: ; preds = %bb23.preheader, %bb23 + %t186 = phi i8 [ %t26, %bb23 ], [ %t9, %bb23.preheader ] + %t26 = add i8 %t186, -1 + %t3.i = icmp slt i8 %t26, 2 + %t20 = sext i8 %t26 to i64 + %t21 = getelementptr inbounds i8, i8* %t3, i64 %t20 + %t22 = icmp eq i8* %t1.0.lcssa, %t21 + br i1 %t22, label %bb14.bb27_crit_edge, label %bb23 + +bb14.bb27_crit_edge: ; preds = %bb23 + %t26.lcssa = phi i8 [ %t26, %bb23 ] + store i8 %t26.lcssa, i8* %t8, align 1 + br label %bb27 + +bb27: ; preds = %bb14.bb27_crit_edge, %bb14.preheader + ret void +}