Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2730,8 +2730,20 @@ if (!MemRuntimeCheck) return; - assert(!BB->getParent()->hasOptSize() && - "Cannot emit memory checks when optimizing for size"); + if (BB->getParent()->hasOptSize()) { + bool Forced = Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled; + assert(Forced && "Cannot emit memory checks when optimizing for size, " + "unless forced to vectorize."); + if (Forced) + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", + L->getStartLoc(), L->getHeader()) + << "Code-size may be reduced by removing forced " + "vectorization, or by source-code modifications " + "eliminating the need for runtime checks " + "(e.g., adding 'restrict')."; + }); + } // Create a new block containing the memory check. BB->setName("vector.memcheck"); Index: llvm/test/Transforms/LoopVectorize/runtime-check.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -disable-basicaa -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -check-prefix=FORCED_OPTSIZE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -157,6 +158,30 @@ ret void } +define dso_local void @forced_optsize(i64* noalias nocapture readonly %x_p, i64* noalias nocapture readonly %y_p, i64* noalias nocapture %z_p) local_unnamed_addr #0 { +; FORCED_OPTSIZE-LABEL: @forced_optsize( +; FORCED_OPTSIZE: vector.body: +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %x_p, i64 %indvars.iv + %0 = load i64, i64* %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds i64, i64* %y_p, i64 %indvars.iv + %1 = load i64, i64* %arrayidx2, align 8 + %add = add nsw i64 %1, %0 + %arrayidx4 = getelementptr inbounds i64, i64* %z_p, i64 %indvars.iv + store i64 %add, i64* %arrayidx4, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 128 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !12 +} + + ; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}}) !llvm.module.flags = !{!0, !1} @@ -177,3 +202,6 @@ splitDebugFilename: "abc.debug", emissionKind: 2) !10 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") !11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = distinct !{!12, !13, !14} +!13 = !{!"llvm.loop.vectorize.width", i32 2} +!14 = !{!"llvm.loop.vectorize.enable", i1 true}