Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -811,7 +811,11 @@ case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -117,7 +117,8 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect || ID == Intrinsic::noalias) + ID == Intrinsic::sideeffect || ID == Intrinsic::noalias || + ID == Intrinsic::side_noalias) return ID; return Intrinsic::not_intrinsic; } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3235,11 +3235,6 @@ if (VF == 1) return ScalarCallCost; - // Compute corresponding vector type for return value and arguments. - Type *RetTy = ToVectorTy(ScalarRetTy, VF); - for (Type *ScalarTy : ScalarTys) - Tys.push_back(ToVectorTy(ScalarTy, VF)); - // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); @@ -3252,6 +3247,11 @@ if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) return Cost; + // Compute corresponding vector type for return value and arguments. + Type *RetTy = ToVectorTy(ScalarRetTy, VF); + for (Type *ScalarTy : ScalarTys) + Tys.push_back(ToVectorTy(ScalarTy, VF)); + // If the corresponding vector cost is cheaper, return its cost. unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); if (VectorCallCost < Cost) { @@ -6892,7 +6892,7 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || - ID == Intrinsic::noalias)) + ID == Intrinsic::noalias || ID == Intrinsic::side_noalias)) return false; } Index: llvm/test/Transforms/LoopVectorize/noalias.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/noalias.ll +++ llvm/test/Transforms/LoopVectorize/noalias.ll @@ -2,19 +2,45 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -; Make sure we can vectorize loops which contain lifetime markers. +; Make sure we can vectorize loops which contain noalias intrinsics -; CHECK-LABEL: @test( + +; A not-used llvm.noalias should not interfere +; CHECK-LABEL: @test_noalias_not_connected( ; CHECK: @llvm.noalias.p0i32 ; CHECK: store <2 x i32> +; CHECK: ret +define void @test_noalias_not_connected(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end -define void @test(i32 *%d) { +for.end: + ret void +} + +; A used llvm.noalias should block vectorization. +; CHECK-LABEL: @test_noalias_connected( +; CHECK: @llvm.noalias.p0i32 +; CHECK-NOT: store <2 x i32> +; CHECK: ret +define void @test_noalias_connected(i32 *%d) { entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %d2 = call i32* @llvm.noalias.p0i32(i32* %d, metadata !1) + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) %arrayidx = getelementptr inbounds i32, i32* %d2, i64 %indvars.iv %v1 = load i32, i32* %arrayidx, align 8 store i32 100, i32* %arrayidx, align 8 @@ -27,8 +53,38 @@ ret void } -declare i32* @llvm.noalias.p0i32(i32*, metadata) nounwind argmemonly +; A used llvm.side.noalias should NOT block vectorization. +; CHECK-LABEL: @test_side_noalias( +; CHECK: @llvm.side.noalias.p0i32 +; NOTE: the noalias_sidechannel is omitted +; CHECK: store <2 x i32> , <2 x i32>* {{%[0-9.a-zA-Z]*}}, align 8 +; CHECK: ret + +define void @test_side_noalias(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %side.d = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %d, i8* null, i32** null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, noalias_sidechannel i32* %side.d, align 8 + store i32 100, i32* %arrayidx, noalias_sidechannel i32* %side.d, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8**, i32, metadata) argmemonly nounwind +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata ) nounwind +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata ) nounwind + + +;declare i32* @llvm.noalias.p0i32(i32*, metadata) nounwind argmemonly !0 = !{!0, !"some domain"} !1 = !{!1, !0, !"some scope"} -