Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7405,6 +7405,19 @@ return true; } +static bool hasRuntimeChecksAndOptSize(Function *F, + LoopVectorizationLegality &LVL) { + const LoopAccessInfo *LAI = LVL.getLAI(); + if (!LAI) + return false; + + bool Empty = LAI->getRuntimePointerChecking()->getChecks().empty(); + if (!Empty && F->hasOptSize()) + return true; + + return false; +} + bool LoopVectorizePass::processLoop(Loop *L) { assert((EnableVPlanNativePath || L->empty()) && "VPlan-native path is not enabled. Only process inner loops."); @@ -7458,6 +7471,12 @@ return false; } + if (hasRuntimeChecksAndOptSize(F, LVL)) { + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: runtime checks required, but " + "optimizing for code size.\n"); + return false; + } + // Check the function attributes and profiles to find out if this function // should be optimized for size. ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); Index: llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -O1 -disable-basicaa -S -o - | FileCheck %s + +; This function has the OptSize attribute, and requires runtime memory checks. +; Check that we nicely bail, i.e. don't vectorise this loop (and +; don't run in an assert) because we don't want to grow code-size. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @f_w2(i64* noalias %x_p, i64* noalias %y_p, i64* noalias %z_p) #0 { +; CHECK-LABEL: @f_w2( +; CHECK-NOT: vector.body: +entry: + %x_p.addr = alloca i64*, align 8 + %y_p.addr = alloca i64*, align 8 + %z_p.addr = alloca i64*, align 8 + %i = alloca i32, align 4 + store i64* %x_p, i64** %x_p.addr, align 8 + store i64* %y_p, i64** %y_p.addr, align 8 + store i64* %z_p, i64** %z_p.addr, align 8 + %0 = bitcast i32* %i to i8* + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %1 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %1, 128 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + %2 = bitcast i32* %i to i8* + br label %for.end + +for.body: + %3 = load i64*, i64** %x_p.addr, align 8 + %4 = load i32, i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i64, i64* %3, i64 %idxprom + %5 = load i64, i64* %arrayidx, align 8 + %6 = load i64*, i64** %y_p.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom1 = sext i32 %7 to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %6, i64 %idxprom1 + %8 = load i64, i64* %arrayidx2, align 8 + %add = add nsw i64 %5, %8 + %9 = load i64*, i64** %z_p.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom3 = sext i32 %10 to i64 + %arrayidx4 = getelementptr inbounds i64, i64* %9, i64 %idxprom3 + store i64 %add, i64* %arrayidx4, align 8 + br label %for.inc + +for.inc: + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !10 + +for.end: + ret void +} + +attributes #0 = { nounwind optsize uwtable "target-cpu"="core-avx2" "target-features"="+avx,+avx2" } + +!10 = distinct !{!10, !11, !12} +!11 = !{!"llvm.loop.vectorize.width", i32 2} +!12 = !{!"llvm.loop.vectorize.enable", i1 true}