diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -145,6 +145,12 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4)) TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8)) +TLI_DEFINE_VECFUNC("sincos", "_ZGVbN2vvv_sincos", FIXED(2)) +TLI_DEFINE_VECFUNC("sincos", "_ZGVdN4vvv_sincos", FIXED(4)) + +TLI_DEFINE_VECFUNC("sincosf", "_ZGVbN4vvv_sincosf", FIXED(4)) +TLI_DEFINE_VECFUNC("sincosf", "_ZGVdN8vvv_sincosf", FIXED(8)) + TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2)) TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4)) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2188,8 +2188,8 @@ // Scan the BB and collect legal loads and stores. Also detect any // convergent instructions. for (Instruction &I : *BB) { - if (auto *Call = dyn_cast(&I)) { - if (Call->isConvergent()) + if (auto *CB = dyn_cast(&I)) { + if (CB->isConvergent()) HasConvergentOp = true; } @@ -2204,23 +2204,36 @@ if (HasComplexMemInst) continue; - // If this is a load, save it. If this instruction can read from memory - // but is not a load, then we quit. Notice that we don't handle function - // calls that read or write. - if (I.mayReadFromMemory()) { + if (auto *Call = dyn_cast(&I)) { // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. - auto *Call = dyn_cast(&I); - if (Call && getVectorIntrinsicIDForCall(Call, TLI)) + if (getVectorIntrinsicIDForCall(Call, TLI)) continue; // If the function has an explicit vectorized counterpart, we can safely - // assume that it can be vectorized. - if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && - !VFDatabase::getMappings(*Call).empty()) + // assume that it can be vectorized unless it has pointer arguments. + if (!Call->isNoBuiltin() && Call->getCalledFunction() && + !VFDatabase::getMappings(*Call).empty()) { + // Don't even check if the user asked for vectorization. + if (IsAnnotatedParallel) + continue; + // Scan arguments for pointers, which currently prevent vectorization. + for (Value *Arg : Call->args()) { + if (Arg->getType()->isPointerTy()) { + HasComplexMemInst = true; + continue; + } + } + // No pointer arguments: safe to vectorize. continue; + } + } + // If this is a load, save it. If this instruction can read from memory + // but is not a load, then we quit. Notice that we don't handle function + // calls that read or write. + if (I.mayReadFromMemory()) { auto *Ld = dyn_cast(&I); if (!Ld) { recordAnalysis("CantVectorizeInstruction", Ld) diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll @@ -356,7 +356,68 @@ !132 = !{!"llvm.loop.vectorize.width", i32 8} !133 = !{!"llvm.loop.vectorize.enable", i1 true} -attributes #0 = { nounwind readnone } +define void @sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f64( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVbN2vvv_sincos(<2 x double> [[TMP4:%.*]], <2 x double*> [[TMP5:%.*]], <2 x double*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to double + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + call void @sincos(double %conv, double* %sinptr, double* %cosptr), !llvm.access.group !145 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !141 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143, !144} +!142 = !{!"llvm.loop.vectorize.width", i32 2} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} +!144 = !{!"llvm.loop.parallel_accesses", !145} +!145 = distinct !{} + +define void @sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f32( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVdN8vvv_sincosf(<8 x float> [[TMP4:%.*]], <8 x float*> [[TMP5:%.*]], <8 x float*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to float + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + call void @sincosf(float %conv, float* %sinptr, float* %cosptr), !llvm.access.group !155 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !151 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153, !154} +!152 = !{!"llvm.loop.vectorize.width", i32 8} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} +!154 = !{!"llvm.loop.parallel_accesses", !155} +!155 = distinct !{} + +; CHECK-LABEL: ; Function Attrs: + +; functions are in fact "readnone" but clang only emits the weaker "writeonly" as other math functions may write errno. +attributes #0 = { nounwind writeonly } declare double @sin(double) #0 declare float @sinf(float) #0 @@ -371,3 +432,8 @@ declare float @llvm.exp.f32(float) #0 declare float @logf(float) #0 declare float @llvm.pow.f32(float, float) #0 + +attributes #1 = { nounwind argmemonly } + +declare void @sincos(double, double*, double*) #1 +declare void @sincosf(float, float*, float*) #1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -356,7 +356,122 @@ !132 = !{!"llvm.loop.vectorize.width", i32 4} !133 = !{!"llvm.loop.vectorize.enable", i1 true} -attributes #0 = { nounwind readnone } +define void @sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f64( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVdN4vvv_sincos(<4 x double> [[TMP4:%.*]], <4 x double*> [[TMP5:%.*]], <4 x double*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to double + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + call void @sincos(double %conv, double* %sinptr, double* %cosptr), !llvm.access.group !145 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !141 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143, !144} +!142 = !{!"llvm.loop.vectorize.width", i32 4} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} +!144 = !{!"llvm.loop.parallel_accesses", !145} +!145 = distinct !{} + +define void @sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @sincos_f32( +; CHECK-LABEL: vector.body +; CHECK: call void @_ZGVbN4vvv_sincosf(<4 x float> [[TMP4:%.*]], <4 x float*> [[TMP5:%.*]], <4 x float*> [[TMP6:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %t = trunc i64 %iv to i32 + %conv = sitofp i32 %t to float + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + call void @sincosf(float %conv, float* %sinptr, float* %cosptr), !llvm.access.group !155 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !151 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153, !154} +!152 = !{!"llvm.loop.vectorize.width", i32 4} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} +!154 = !{!"llvm.loop.parallel_accesses", !155} +!155 = distinct !{} + +define void @dependent_sincos_f64(double* nocapture noalias %sinarray, double* nocapture noalias %cosarray) { +; CHECK-LABEL: @dependent_sincos_f64( +; CHECK-NOT: @_ZGVdN4vvv_sincos +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ] + %iv.prev = sub nuw nsw i64 %iv, 1 + %phaseptr = getelementptr inbounds double, double* %cosarray, i64 %iv.prev + %sinptr = getelementptr inbounds double, double* %sinarray, i64 %iv + %cosptr = getelementptr inbounds double, double* %cosarray, i64 %iv + %phase = load double, double* %phaseptr + call void @sincos(double %phase, double* %sinptr, double* %cosptr) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !161 + +for.end: + ret void +} + +!161 = distinct !{!161, !162, !163} +!162 = !{!"llvm.loop.vectorize.width", i32 4} +!163 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @dependent_sincos_f32(float* nocapture noalias %sinarray, float* nocapture noalias %cosarray) { +; CHECK-LABEL: @dependent_sincos_f32( +; CHECK-NOT: @_ZGVbN4vvv_sincosf +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %iv.prev = sub nuw nsw i64 %iv, 1 + %phaseptr = getelementptr inbounds float, float* %sinarray, i64 %iv.prev + %sinptr = getelementptr inbounds float, float* %sinarray, i64 %iv + %cosptr = getelementptr inbounds float, float* %cosarray, i64 %iv + %phase = load float, float* %phaseptr + call void @sincosf(float %phase, float* %sinptr, float* %cosptr) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !171 + +for.end: + ret void +} + +!171 = distinct !{!171, !172, !173} +!172 = !{!"llvm.loop.vectorize.width", i32 4} +!173 = !{!"llvm.loop.vectorize.enable", i1 true} + +; CHECK-LABEL: ; Function Attrs: + +; functions are in fact "readnone" but clang only emits the weaker "writeonly" as other math functions may write errno. +attributes #0 = { nounwind writeonly } declare double @sin(double) #0 declare float @sinf(float) #0 @@ -371,3 +486,8 @@ declare float @llvm.exp.f32(float) #0 declare float @logf(float) #0 declare float @llvm.pow.f32(float, float) #0 + +attributes #1 = { nounwind argmemonly } + +declare void @sincos(double, double*, double*) #1 +declare void @sincosf(float, float*, float*) #1