Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2695,10 +2695,8 @@ if (C->isZero()) return; - // FIXME: FoldTailByMasking no longer implies OptForSize, this should be - // checking CM_ScalarEpilogueNotAllowedOptSize - assert(!Cost->foldTailByMasking() && - "Cannot SCEV check stride or overflow when folding tail"); + assert(!BB->getParent()->hasOptSize() && + "Cannot SCEV check stride or overflow when optimizing for size"); // Create a new block containing the stride check. BB->setName("vector.scevcheck"); Index: llvm/test/Transforms/LoopVectorize/X86/tail-folding-scevcheck.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/tail-folding-scevcheck.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-vectorize -mcpu=core-avx2 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external dso_local local_unnamed_addr global [1024 x i32], align 16 + +define dso_local void @_Z5doit4ic(i32 %n, i8 signext %cstep) local_unnamed_addr #0 { +; CHECK-LABEL: @_Z5doit4ic +; +; CHECK: vector.scevcheck: +; CHECK: br i1 {{.*}}, label %scalar.ph, label %vector.ph +; +; CHECK: vector.ph: +; CHECK: br label %vector.body +; +; CHECK: vector.body: +; CHECK: call void @llvm.masked.store.v8i32.p0v8i32 +; CHECK: br i1 {{.*}}, label {{.*}}, label %vector.body +entry: + %conv = sext i8 %cstep to i32 + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %p.010 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ] + %sext = shl i32 %p.010, 24 + %conv1 = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv1, i32* %arrayidx, align 4 + %add = add nsw i32 %conv1, %conv + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !6 + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +attributes #0 = { nofree norecurse nounwind uwtable } + +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!8 = !{!"llvm.loop.vectorize.enable", i1 true} +