Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -147,6 +147,7 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -90,7 +90,8 @@ return Intrinsic::not_intrinsic; if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || - ID == Intrinsic::lifetime_end || ID == Intrinsic::assume) + ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || + ID == Intrinsic::noalias) return ID; return Intrinsic::not_intrinsic; } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4506,6 +4506,13 @@ Module *M = BB->getParent()->getParent(); CallInst *CI = cast(it); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || + ID == Intrinsic::lifetime_start || ID == Intrinsic::noalias)) { + scalarizeInstruction(&*it); + break; + } + StringRef FnName = CI->getCalledFunction()->getName(); Function *F = CI->getCalledFunction(); Type *RetTy = ToVectorTy(CI->getType(), VF); @@ -4513,12 +4520,6 @@ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start)) { - scalarizeInstruction(&*it); - break; - } // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? Index: test/Transforms/LoopVectorize/noalias.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/noalias.ll @@ -0,0 +1,34 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure we can vectorize loops which contain lifetime markers. + +; CHECK-LABEL: @test( +; CHECK: @llvm.noalias.p0i32 +; CHECK: store <2 x i32> + +define void @test(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32(i32* %d, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d2, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare i32* @llvm.noalias.p0i32(i32*, metadata) nounwind argmemonly + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} +