Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7555,6 +7555,11 @@ // Use the cost model. LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); + + // Don't vectorize if we expect code growth and compile for min size. + if (F->hasOptSize() && CM.runtimeChecksRequired()) + return false; + CM.collectValuesToIgnore(); // Use the planner for vectorization. Index: llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -O1 -disable-basicaa -S -o - | FileCheck %s + +; This function has the OptSize attribute, and requires runtime memory checks. +; Check that we nicely bail, i.e. don't vectorise this loop (and +; don't run in an assert) because we don't want to grow code-size. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @f_w2(i64* noalias %x_p, i64* noalias %y_p, i64* noalias %z_p) #0 { +; CHECK-LABEL: @f_w2( +; CHECK-NOT: vector.body: +entry: + %x_p.addr = alloca i64*, align 8 + %y_p.addr = alloca i64*, align 8 + %z_p.addr = alloca i64*, align 8 + %i = alloca i32, align 4 + store i64* %x_p, i64** %x_p.addr, align 8 + store i64* %y_p, i64** %y_p.addr, align 8 + store i64* %z_p, i64** %z_p.addr, align 8 + %0 = bitcast i32* %i to i8* + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %1 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %1, 128 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + %2 = bitcast i32* %i to i8* + br label %for.end + +for.body: + %3 = load i64*, i64** %x_p.addr, align 8 + %4 = load i32, i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i64, i64* %3, i64 %idxprom + %5 = load i64, i64* %arrayidx, align 8 + %6 = load i64*, i64** %y_p.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom1 = sext i32 %7 to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %6, i64 %idxprom1 + %8 = load i64, i64* %arrayidx2, align 8 + %add = add nsw i64 %5, %8 + %9 = load i64*, i64** %z_p.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom3 = sext i32 %10 to i64 + %arrayidx4 = getelementptr inbounds i64, i64* %9, i64 %idxprom3 + store i64 %add, i64* %arrayidx4, align 8 + br label %for.inc + +for.inc: + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !10 + +for.end: + ret void +} + +attributes #0 = { nounwind optsize uwtable "target-cpu"="core-avx2" "target-features"="+avx,+avx2" } + +!10 = distinct !{!10, !11, !12} +!11 = !{!"llvm.loop.vectorize.width", i32 2} +!12 = !{!"llvm.loop.vectorize.enable", i1 true}