Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2730,9 +2730,6 @@ if (!MemRuntimeCheck) return; - assert(!BB->getParent()->hasOptSize() && - "Cannot emit memory checks when optimizing for size"); - // Create a new block containing the memory check. BB->setName("vector.memcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); Index: llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/runtime-check-optsize.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -O1 -disable-basicaa -S -o - | FileCheck %s + +; This function has the OptSize function attribute set and the vectorize loop +; hints. As vectorization is forced with a pragma, we do allow this to be +; vectorized. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @f_w2(i64* noalias %x_p, i64* noalias %y_p, i64* noalias %z_p) #0 { +; CHECK-LABEL: @f_w2( +; CHECK: vector.body: +entry: + %x_p.addr = alloca i64*, align 8 + %y_p.addr = alloca i64*, align 8 + %z_p.addr = alloca i64*, align 8 + %i = alloca i32, align 4 + store i64* %x_p, i64** %x_p.addr, align 8 + store i64* %y_p, i64** %y_p.addr, align 8 + store i64* %z_p, i64** %z_p.addr, align 8 + %0 = bitcast i32* %i to i8* + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %1 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %1, 128 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + %2 = bitcast i32* %i to i8* + br label %for.end + +for.body: + %3 = load i64*, i64** %x_p.addr, align 8 + %4 = load i32, i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i64, i64* %3, i64 %idxprom + %5 = load i64, i64* %arrayidx, align 8 + %6 = load i64*, i64** %y_p.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom1 = sext i32 %7 to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %6, i64 %idxprom1 + %8 = load i64, i64* %arrayidx2, align 8 + %add = add nsw i64 %5, %8 + %9 = load i64*, i64** %z_p.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom3 = sext i32 %10 to i64 + %arrayidx4 = getelementptr inbounds i64, i64* %9, i64 %idxprom3 + store i64 %add, i64* %arrayidx4, align 8 + br label %for.inc + +for.inc: + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !10 + +for.end: + ret void +} + +attributes #0 = { nounwind optsize uwtable "target-cpu"="core-avx2" "target-features"="+avx,+avx2" } + +!10 = distinct !{!10, !11, !12} +!11 = !{!"llvm.loop.vectorize.width", i32 2} +!12 = !{!"llvm.loop.vectorize.enable", i1 true}