Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1062,6 +1062,10 @@ B.SetInsertPointPastAllocas(F); + DILocation *MergedDebugLoc = + DILocation::getMergedLocation(Sin->getDebugLoc(), Cos->getDebugLoc()); + B.SetCurrentDebugLocation(MergedDebugLoc); + AllocaInst *Alloc = B.CreateAlloca(Sin->getType(), nullptr, "__sincos_"); if (Instruction *ArgInst = dyn_cast(Arg)) { @@ -1070,6 +1074,7 @@ // if it's an argument or constant. B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); + B.SetCurrentDebugLocation(MergedDebugLoc); } Value *P = Alloc; @@ -1087,6 +1092,7 @@ CallInst *Call = CreateCallEx2(B, Fsincos, Arg, P); LoadInst *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); + Reload->setDebugLoc(Cos->getDebugLoc()); LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *Sin << ", " << *Cos << ") with " << *Call << '\n'); @@ -1117,7 +1123,6 @@ CallInst *CI = cast(FPOp); bool Changed = false; - Module *M = CI->getModule(); FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, fInfo); const std::string PairName = PartnerInfo.mangle(); Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -1078,12 +1078,12 @@ ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG12:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr, !dbg [[DBG12]] ; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]), !dbg [[DBG12]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG12]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG14:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP2]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG15:![0-9]+]] -; CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG13:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP2]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: ret void, !dbg [[DBG17:![0-9]+]] ; entry: %call = tail call contract float @_Z3sinf(float %x), !dbg !19 @@ -1100,9 +1100,9 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath !17 +; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath !18 ; CHECK-NEXT: store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[SIN1:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]]), !fpmath !18 +; CHECK-NEXT: [[SIN1:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]]), !fpmath !19 ; CHECK-NEXT: [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4 ; CHECK-NEXT: store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret float [[SIN1]] @@ -1122,10 +1122,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath !17 +; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath !18 ; CHECK-NEXT: store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[COS_TMP_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP]] to ptr -; CHECK-NEXT: [[SIN1:%.*]] = call ninf nsz contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]]), !fpmath !18 +; CHECK-NEXT: [[SIN1:%.*]] = call ninf nsz contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]]), !fpmath !19 ; CHECK-NEXT: [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4 ; CHECK-NEXT: store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret float [[SIN1]] Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll @@ -0,0 +1,457 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4sqrtf(float) +declare <2 x float> @_Z4sqrtDv2_f(<2 x float>) +declare <3 x float> @_Z4sqrtDv3_f(<3 x float>) +declare <4 x float> @_Z4sqrtDv4_f(<4 x float>) +declare <8 x float> @_Z4sqrtDv8_f(<8 x float>) +declare <16 x float> @_Z4sqrtDv16_f(<16 x float>) + +declare double @_Z4sqrtd(double) +declare <2 x double> @_Z4sqrtDv2_d(<2 x double>) +declare <3 x double> @_Z4sqrtDv3_d(<3 x double>) +declare <4 x double> @_Z4sqrtDv4_d(<4 x double>) +declare <8 x double> @_Z4sqrtDv8_d(<8 x double>) +declare <16 x double> @_Z4sqrtDv16_d(<16 x double>) + +declare half @_Z4sqrtDh(half) +declare <2 x half> @_Z4sqrtDv2_Dh(<2 x half>) +declare <3 x half> @_Z4sqrtDv3_Dh(<3 x half>) +declare <4 x half> @_Z4sqrtDv4_Dh(<4 x half>) +declare <8 x half> @_Z4sqrtDv8_Dh(<8 x half>) +declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>) + +define float @test_sqrt_f32(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %sqrt +} + +define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[SQRT]] +; + %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %sqrt +} + +define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[SQRT]] +; + %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %sqrt +} + +define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[SQRT]] +; + %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %sqrt +} + +define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[SQRT]] +; + %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %sqrt +} + +define float @test_sqrt_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) + ret <2 x float> %sqrt +} + +define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[SQRT]] +; + %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg) + ret <3 x float> %sqrt +} + +define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[SQRT]] +; + %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg) + ret <4 x float> %sqrt +} + +define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[SQRT]] +; + %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg) + ret <8 x float> %sqrt +} + +define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[SQRT]] +; + %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg) + ret <16 x float> %sqrt +} + +define double @test_sqrt_f64(double %arg) { +; CHECK-LABEL: define double @test_sqrt_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]]) +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @_Z4sqrtd(double %arg) + ret double %sqrt +} + +define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[SQRT]] +; + %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg) + ret <2 x double> %sqrt +} + +define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[SQRT]] +; + %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg) + ret <3 x double> %sqrt +} + +define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[SQRT]] +; + %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg) + ret <4 x double> %sqrt +} + +define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[SQRT]] +; + %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg) + ret <8 x double> %sqrt +} + +define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[SQRT]] +; + %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg) + ret <16 x double> %sqrt +} + +define half @test_sqrt_f16(half %arg) { +; CHECK-LABEL: define half @test_sqrt_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]]) +; CHECK-NEXT: ret half [[SQRT]] +; + %sqrt = tail call half @_Z4sqrtDh(half %arg) + ret half %sqrt +} + +define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[SQRT]] +; + %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg) + ret <2 x half> %sqrt +} + +define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[SQRT]] +; + %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg) + ret <3 x half> %sqrt +} + +define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[SQRT]] +; + %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg) + ret <4 x half> %sqrt +} + +define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[SQRT]] +; + %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg) + ret <8 x half> %sqrt +} + +define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[SQRT]] +; + %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg) + ret <16 x half> %sqrt +} + +define float @test_sqrt_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0 + ret <2 x float> %sqrt +} + +; "no-builtins" should be ignored +define float @test_sqrt_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf contract float @_Z4sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg) + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) + ret <2 x float> %sqrt +} + +; Test the libm name, not a recognized opencl builtin. +declare float @sqrtf(float) #2 +declare double @sqrt(double) #2 + +define float @test_libm_sqrt_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_sqrt_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @sqrtf(float %arg) + ret float %sqrt +} + +define float @test_libm_sqrt_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_sqrt_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define double @test_libm_sqrt_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_sqrt_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]) +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @sqrt(double %arg) + ret double %sqrt +} + +define double @test_libm_sqrt_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_sqrt_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @sqrt(double %arg), !fpmath !0 + ret double %sqrt +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234}