diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -145,6 +145,12 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4)) TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8)) +TLI_DEFINE_VECFUNC("sincos", "_ZGVbN2vvv_sincos", FIXED(2)) +TLI_DEFINE_VECFUNC("sincos", "_ZGVdN4vvv_sincos", FIXED(4)) + +TLI_DEFINE_VECFUNC("sincosf", "_ZGVbN4vvv_sincosf", FIXED(4)) +TLI_DEFINE_VECFUNC("sincosf", "_ZGVdN8vvv_sincosf", FIXED(8)) + TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2)) TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4)) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2213,10 +2213,22 @@ continue; // If the function has an explicit vectorized counterpart, we can safely - // assume that it can be vectorized. + // assume that it can be vectorized unless it has pointer arguments. if (!Call->isNoBuiltin() && Call->getCalledFunction() && - !VFDatabase::getMappings(*Call).empty()) + !VFDatabase::getMappings(*Call).empty()) { + // Don't even check if the user asked for vectorization. + if (IsAnnotatedParallel) + continue; + // Scan arguments for pointers, which currently prevent vectorization. + for (Value *Arg : Call->args()) { + if (Arg->getType()->isPointerTy()) { + HasComplexMemInst = true; + continue; + } + } + // No pointer arguments: safe to vectorize. continue; + } } // If this is a load, save it. If this instruction can read from memory diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll @@ -356,6 +356,66 @@ !132 = !{!"llvm.loop.vectorize.width", i32 8} !133 = !{!"llvm.loop.vectorize.enable", i1 true} +define void @sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f64( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVbN2vvv_sincos(<2 x double> [[TMP4:%.*]], <2 x double*> [[TMP5:%.*]], <2 x double*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to double + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + call void @sincos(double %conv, double* %sinptr, double* %cosptr), !llvm.access.group !145 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !141 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143, !144} +!142 = !{!"llvm.loop.vectorize.width", i32 2} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} +!144 = !{!"llvm.loop.parallel_accesses", !145} +!145 = distinct !{} + +define void @sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f32( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVdN8vvv_sincosf(<8 x float> [[TMP4:%.*]], <8 x float*> [[TMP5:%.*]], <8 x float*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to float + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + call void @sincosf(float %conv, float* %sinptr, float* %cosptr), !llvm.access.group !155 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !151 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153, !154} +!152 = !{!"llvm.loop.vectorize.width", i32 8} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} +!154 = !{!"llvm.loop.parallel_accesses", !155} +!155 = distinct !{} + +; CHECK-LABEL: ; Function Attrs: + ; functions are in fact "readnone" but clang only emits the weaker "writeonly" as other math functions may write errno. attributes #0 = { nounwind writeonly } @@ -372,3 +432,8 @@ declare float @llvm.exp.f32(float) #0 declare float @logf(float) #0 declare float @llvm.pow.f32(float, float) #0 + +attributes #1 = { nounwind argmemonly } + +declare void @sincos(double, double*, double*) #1 +declare void @sincosf(float, float*, float*) #1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -356,6 +356,120 @@ !132 = !{!"llvm.loop.vectorize.width", i32 4} !133 = !{!"llvm.loop.vectorize.enable", i1 true} +define void @sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f64( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVdN4vvv_sincos(<4 x double> [[TMP4:%.*]], <4 x double*> [[TMP5:%.*]], <4 x double*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to double + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + call void @sincos(double %conv, double* %sinptr, double* %cosptr), !llvm.access.group !145 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !141 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143, !144} +!142 = !{!"llvm.loop.vectorize.width", i32 4} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} +!144 = !{!"llvm.loop.parallel_accesses", !145} +!145 = distinct !{} + +define void @sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f32( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVbN4vvv_sincosf(<4 x float> [[TMP4:%.*]], <4 x float*> [[TMP5:%.*]], <4 x float*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to float + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + call void @sincosf(float %conv, float* %sinptr, float* %cosptr), !llvm.access.group !155 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !151 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153, !154} +!152 = !{!"llvm.loop.vectorize.width", i32 4} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} +!154 = !{!"llvm.loop.parallel_accesses", !155} +!155 = distinct !{} + +define void @dependent_sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @dependent_sincos_f64( +; CHECK-NOT: @_ZGVdN4vvv_sincos +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ] + %iv.prev = sub nuw nsw i64 %iv, 1 + %phaseptr = getelementptr inbounds double, double* %cosarray, i64 %iv.prev + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + %phase = load double, double* %phaseptr + call void @sincos(double %phase, double* %sinptr, double* %cosptr) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !161 + +for.end: + ret void +} + +!161 = distinct !{!161, !162, !163} +!162 = !{!"llvm.loop.vectorize.width", i32 4} +!163 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @dependent_sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @dependent_sincos_f32( +; CHECK-NOT: @_ZGVbN4vvv_sincosf +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %iv.prev = sub nuw nsw i64 %iv, 1 + %phaseptr = getelementptr inbounds float, float* %sinarray, i64 %iv.prev + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + %phase = load float, float* %phaseptr + call void @sincosf(float %phase, float* %sinptr, float* %cosptr) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !171 + +for.end: + ret void +} + +!171 = distinct !{!171, !172, !173} +!172 = !{!"llvm.loop.vectorize.width", i32 4} +!173 = !{!"llvm.loop.vectorize.enable", i1 true} + +; CHECK-LABEL: ; Function Attrs: + ; functions are in fact "readnone" but clang only emits the weaker "writeonly" as other math functions may write errno. attributes #0 = { nounwind writeonly } @@ -372,3 +486,8 @@ declare float @llvm.exp.f32(float) #0 declare float @logf(float) #0 declare float @llvm.pow.f32(float, float) #0 + +attributes #1 = { nounwind argmemonly } + +declare void @sincos(double, double*, double*) #1 +declare void @sincosf(float, float*, float*) #1