Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4ceilf(float) +declare <2 x float> @_Z4ceilDv2_f(<2 x float>) +declare <3 x float> @_Z4ceilDv3_f(<3 x float>) +declare <4 x float> @_Z4ceilDv4_f(<4 x float>) +declare <8 x float> @_Z4ceilDv8_f(<8 x float>) +declare <16 x float> @_Z4ceilDv16_f(<16 x float>) + +declare double @_Z4ceild(double) +declare <2 x double> @_Z4ceilDv2_d(<2 x double>) +declare <3 x double> @_Z4ceilDv3_d(<3 x double>) +declare <4 x double> @_Z4ceilDv4_d(<4 x double>) +declare <8 x double> @_Z4ceilDv8_d(<8 x double>) +declare <16 x double> @_Z4ceilDv16_d(<16 x double>) + +declare half @_Z4ceilDh(half) +declare <2 x half> @_Z4ceilDv2_Dh(<2 x half>) +declare <3 x half> @_Z4ceilDv3_Dh(<3 x half>) +declare <4 x half> @_Z4ceilDv4_Dh(<4 x half>) +declare <8 x half> @_Z4ceilDv8_Dh(<8 x half>) +declare <16 x half> @_Z4ceilDv16_Dh(<16 x half>) + +define float @test_ceil_f32(float %arg) { +; CHECK-LABEL: define float @test_ceil_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call float @_Z4ceilf(float %arg) + ret float %ceil +} + +define <2 x float> @test_ceil_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_ceil_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[CEIL]] +; + %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) + ret <2 x float> %ceil +} + +define <3 x float> @test_ceil_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_ceil_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <3 x float> @_Z4ceilDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[CEIL]] +; + %ceil = tail call <3 x float> @_Z4ceilDv3_f(<3 x float> %arg) + ret <3 x float> %ceil +} + +define <4 x float> @test_ceil_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_ceil_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <4 x float> @_Z4ceilDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[CEIL]] +; + %ceil = tail call <4 x float> @_Z4ceilDv4_f(<4 x float> %arg) + ret <4 x float> %ceil +} + +define <8 x float> @test_ceil_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_ceil_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <8 x float> @_Z4ceilDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[CEIL]] +; + %ceil = tail call <8 x float> @_Z4ceilDv8_f(<8 x float> %arg) + ret <8 x float> %ceil +} + +define <16 x float> @test_ceil_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_ceil_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <16 x float> @_Z4ceilDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[CEIL]] +; + %ceil = tail call <16 x float> @_Z4ceilDv16_f(<16 x float> %arg) + ret <16 x float> %ceil +} + +define double @test_ceil_f64(double %arg) { +; CHECK-LABEL: define double @test_ceil_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call double @_Z4ceild(double [[ARG]]) +; CHECK-NEXT: ret double [[CEIL]] +; + %ceil = tail call double @_Z4ceild(double %arg) + ret double %ceil +} + +define <2 x double> @test_ceil_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_ceil_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <2 x double> @_Z4ceilDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[CEIL]] +; + %ceil = tail call <2 x double> @_Z4ceilDv2_d(<2 x double> %arg) + ret <2 x double> %ceil +} + +define <3 x double> @test_ceil_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_ceil_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <3 x double> @_Z4ceilDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[CEIL]] +; + %ceil = tail call <3 x double> @_Z4ceilDv3_d(<3 x double> %arg) + ret <3 x double> %ceil +} + +define <4 x double> @test_ceil_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_ceil_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <4 x double> @_Z4ceilDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[CEIL]] +; + %ceil = tail call <4 x double> @_Z4ceilDv4_d(<4 x double> %arg) + ret <4 x double> %ceil +} + +define <8 x double> @test_ceil_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_ceil_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <8 x double> @_Z4ceilDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[CEIL]] +; + %ceil = tail call <8 x double> @_Z4ceilDv8_d(<8 x double> %arg) + ret <8 x double> %ceil +} + +define <16 x double> @test_ceil_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_ceil_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <16 x double> @_Z4ceilDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[CEIL]] +; + %ceil = tail call <16 x double> @_Z4ceilDv16_d(<16 x double> %arg) + ret <16 x double> %ceil +} + +define half @test_ceil_f16(half %arg) { +; CHECK-LABEL: define half @test_ceil_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call half @_Z4ceilDh(half [[ARG]]) +; CHECK-NEXT: ret half [[CEIL]] +; + %ceil = tail call half @_Z4ceilDh(half %arg) + ret half %ceil +} + +define <2 x half> @test_ceil_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_ceil_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <2 x half> @_Z4ceilDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[CEIL]] +; + %ceil = tail call <2 x half> @_Z4ceilDv2_Dh(<2 x half> %arg) + ret <2 x half> %ceil +} + +define <3 x half> @test_ceil_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_ceil_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <3 x half> @_Z4ceilDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[CEIL]] +; + %ceil = tail call <3 x half> @_Z4ceilDv3_Dh(<3 x half> %arg) + ret <3 x half> %ceil +} + +define <4 x half> @test_ceil_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_ceil_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <4 x half> @_Z4ceilDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[CEIL]] +; + %ceil = tail call <4 x half> @_Z4ceilDv4_Dh(<4 x half> %arg) + ret <4 x half> %ceil +} + +define <8 x half> @test_ceil_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_ceil_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <8 x half> @_Z4ceilDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[CEIL]] +; + %ceil = tail call <8 x half> @_Z4ceilDv8_Dh(<8 x half> %arg) + ret <8 x half> %ceil +} + +define <16 x half> @test_ceil_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_ceil_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <16 x half> @_Z4ceilDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[CEIL]] +; + %ceil = tail call <16 x half> @_Z4ceilDv16_Dh(<16 x half> %arg) + ret <16 x half> %ceil +} + +define float @test_ceil_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_ceil_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call float @_Z4ceilf(float %arg) #0 + ret float %ceil +} + +define <2 x float> @test_ceil_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[CEIL]] +; + %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0 + ret <2 x float> %ceil +} + +; "no-builtins" should be ignored +define float @test_ceil_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_ceil_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call float @_Z4ceilf(float %arg) #0 + ret float %ceil +} + +define <2 x float> @test_ceil_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[CEIL]] +; + %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0 + ret <2 x float> %ceil +} + +define float @test_ceil_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_ceil_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call nnan ninf float @_Z4ceilf(float [[ARG]]) +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call nnan ninf float @_Z4ceilf(float %arg) + ret float %ceil +} + +define <2 x float> @test_ceil_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call nnan nsz contract <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[CEIL]] +; + %ceil = tail call contract nsz nnan <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) + ret <2 x float> %ceil +} + +define float @test_ceil_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_ceil_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call nnan ninf float @_Z4ceilf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call nnan ninf float @_Z4ceilf(float %arg), !foo !0 + ret float %ceil +} + +define <2 x float> @test_ceil_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call nnan nsz contract <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[CEIL]] +; + %ceil = tail call contract nsz nnan <2 x float> @_Z4ceilDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %ceil +} + +; Test the libm name, not a recognized opencl builtin. +declare float @ceilf(float) #2 +declare double @ceil(double) #2 + +define float @test_libm_ceil_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_ceil_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call float @ceilf(float [[ARG]]) +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call float @ceilf(float %arg) + ret float %ceil +} + +define double @test_libm_ceil_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_ceil_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[CEIL:%.*]] = tail call double @ceil(double [[ARG]]) +; CHECK-NEXT: ret double [[CEIL]] +; + %ceil = tail call double @ceil(double %arg) + ret double %ceil +} + +define float @test_ceil_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_ceil_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[CEIL:%.*]] = tail call nnan float @_Z4ceilf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[CEIL]] +; + %ceil = tail call nnan float @_Z4ceilf(float %arg) #3 + ret float %ceil +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll @@ -0,0 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z8copysignff(float, float) +declare <2 x float> @_Z8copysignDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z8copysignDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z8copysignDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z8copysignDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z8copysignDv16_fS_(<16 x float>, <16 x float>) +declare double @_Z8copysigndd(double, double) +declare <2 x double> @_Z8copysignDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z8copysignDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z8copysignDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z8copysignDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z8copysignDv16_dS_(<16 x double>, <16 x double>) +declare half @_Z8copysignDhDh(half, half) +declare <2 x half> @_Z8copysignDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z8copysignDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z8copysignDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z8copysignDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z8copysignDv16_DhS_(<16 x half>, <16 x half>) + +define float @test_copysign_f32(float %x, float %y) { +; CHECK-LABEL: define float @test_copysign_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call float @_Z8copysignff(float %x, float %y) + ret float %copysign +} + +define float @test_copysign_f32_nnan(float %x, float %y) { +; CHECK-LABEL: define float @test_copysign_f32_nnan +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) + ret float %copysign +} + +define <2 x float> @test_copysign_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_copysign_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[COPYSIGN]] +; + %copysign = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %copysign +} + +define <3 x float> @test_copysign_v3f32(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_copysign_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[COPYSIGN]] +; + %copysign = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> %x, <3 x float> %y) + ret <3 x float> %copysign +} + +define <4 x float> @test_copysign_v4f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_copysign_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[COPYSIGN]] +; + %copysign = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> %x, <4 x float> %y) + ret <4 x float> %copysign +} + +define <8 x float> @test_copysign_v8f32(<8 x float> %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_copysign_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[COPYSIGN]] +; + %copysign = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> %x, <8 x float> %y) + ret <8 x float> %copysign +} + +define <16 x float> @test_copysign_v16f32(<16 x float> %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_copysign_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[COPYSIGN]] +; + %copysign = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> %x, <16 x float> %y) + ret <16 x float> %copysign +} + +define double @test_copysign_f64(double %x, double %y) { +; CHECK-LABEL: define double @test_copysign_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call double @_Z8copysigndd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[COPYSIGN]] +; + %copysign = tail call double @_Z8copysigndd(double %x, double %y) + ret double %copysign +} + +define <2 x double> @test_copysign_v2f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_copysign_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[COPYSIGN]] +; + %copysign = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %copysign +} + +define <3 x double> @test_copysign_v3f64(<3 x double> %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_copysign_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[COPYSIGN]] +; + %copysign = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> %x, <3 x double> %y) + ret <3 x double> %copysign +} + +define <4 x double> @test_copysign_v4f64(<4 x double> %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_copysign_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[COPYSIGN]] +; + %copysign = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> %x, <4 x double> %y) + ret <4 x double> %copysign +} + +define <8 x double> @test_copysign_v8f64(<8 x double> %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_copysign_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[COPYSIGN]] +; + %copysign = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> %x, <8 x double> %y) + ret <8 x double> %copysign +} + +define <16 x double> @test_copysign_v16f64(<16 x double> %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_copysign_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[COPYSIGN]] +; + %copysign = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> %x, <16 x double> %y) + ret <16 x double> %copysign +} + +define half @test_copysign_f16(half %x, half %y) { +; CHECK-LABEL: define half @test_copysign_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call half @_Z8copysignDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[COPYSIGN]] +; + %copysign = tail call half @_Z8copysignDhDh(half %x, half %y) + ret half %copysign +} + +define <2 x half> @test_copysign_v2f16(<2 x half> %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_copysign_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[COPYSIGN]] +; + %copysign = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %copysign +} + +define <3 x half> @test_copysign_v3f16(<3 x half> %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_copysign_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[COPYSIGN]] +; + %copysign = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> %x, <3 x half> %y) + ret <3 x half> %copysign +} + +define <4 x half> @test_copysign_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_copysign_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[COPYSIGN]] +; + %copysign = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> %x, <4 x half> %y) + ret <4 x half> %copysign +} + +define <8 x half> @test_copysign_v8f16(<8 x half> %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_copysign_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[COPYSIGN]] +; + %copysign = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> %x, <8 x half> %y) + ret <8 x half> %copysign +} + +define <16 x half> @test_copysign_v16f16(<16 x half> %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_copysign_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[COPYSIGN]] +; + %copysign = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> %x, <16 x half> %y) + ret <16 x half> %copysign +} + +define float @test_copysign_f32_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_copysign_f32_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call float @_Z8copysignff(float %x, float %y) + ret float %copysign +} + +define float @test_copysign_f32_nnan_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_copysign_f32_nnan_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) + ret float %copysign +} + +define float @test_copysign_f32_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_copysign_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call float @_Z8copysignff(float %x, float %y) #1 + ret float %copysign +} + +define float @test_copysign_f32_nnan_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_copysign_f32_nnan_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) #1 + ret float %copysign +} + +define float @test_copysign_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @test_copysign_f32_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call nnan nsz float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call nsz nnan float @_Z8copysignff(float %x, float %y) #2 + ret float %copysign +} + +define float @test_copysign_f32_fast_nobuiltin(float %x, float %y) { +; CHECK-LABEL: define float @test_copysign_f32_fast_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[COPYSIGN:%.*]] = tail call fast float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[COPYSIGN]] +; + %copysign = tail call fast float @_Z8copysignff(float %x, float %y) #3 + ret float %copysign +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z3expf(float) +declare <2 x float> @_Z3expDv2_f(<2 x float>) +declare <3 x float> @_Z3expDv3_f(<3 x float>) +declare <4 x float> @_Z3expDv4_f(<4 x float>) +declare <8 x float> @_Z3expDv8_f(<8 x float>) +declare <16 x float> @_Z3expDv16_f(<16 x float>) + +declare double @_Z3expd(double) +declare <2 x double> @_Z3expDv2_d(<2 x double>) +declare <3 x double> @_Z3expDv3_d(<3 x double>) +declare <4 x double> @_Z3expDv4_d(<4 x double>) +declare <8 x double> @_Z3expDv8_d(<8 x double>) +declare <16 x double> @_Z3expDv16_d(<16 x double>) + +declare half @_Z3expDh(half) +declare <2 x half> @_Z3expDv2_Dh(<2 x half>) +declare <3 x half> @_Z3expDv3_Dh(<3 x half>) +declare <4 x half> @_Z3expDv4_Dh(<4 x half>) +declare <8 x half> @_Z3expDv8_Dh(<8 x half>) +declare <16 x half> @_Z3expDv16_Dh(<16 x half>) + +define float @test_exp_f32(float %arg) { +; CHECK-LABEL: define float @test_exp_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define <2 x float> @test_exp_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %exp +} + +define <3 x float> @test_exp_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_exp_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <3 x float> @_Z3expDv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[EXP]] +; + %exp = tail call <3 x float> @_Z3expDv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %exp +} + +define <4 x float> @test_exp_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_exp_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <4 x float> @_Z3expDv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[EXP]] +; + %exp = tail call <4 x float> @_Z3expDv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %exp +} + +define <8 x float> @test_exp_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_exp_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <8 x float> @_Z3expDv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[EXP]] +; + %exp = tail call <8 x float> @_Z3expDv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %exp +} + +define <16 x float> @test_exp_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_exp_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <16 x float> @_Z3expDv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[EXP]] +; + %exp = tail call <16 x float> @_Z3expDv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %exp +} + +define float @test_exp_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_exp_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg) + ret float %exp +} + +define <2 x float> @test_exp_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) + ret <2 x float> %exp +} + +define <3 x float> @test_exp_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_exp_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <3 x float> @_Z3expDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[EXP]] +; + %exp = tail call <3 x float> @_Z3expDv3_f(<3 x float> %arg) + ret <3 x float> %exp +} + +define <4 x float> @test_exp_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_exp_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <4 x float> @_Z3expDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[EXP]] +; + %exp = tail call <4 x float> @_Z3expDv4_f(<4 x float> %arg) + ret <4 x float> %exp +} + +define <8 x float> @test_exp_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_exp_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <8 x float> @_Z3expDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[EXP]] +; + %exp = tail call <8 x float> @_Z3expDv8_f(<8 x float> %arg) + ret <8 x float> %exp +} + +define <16 x float> @test_exp_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_exp_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <16 x float> @_Z3expDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[EXP]] +; + %exp = tail call <16 x float> @_Z3expDv16_f(<16 x float> %arg) + ret <16 x float> %exp +} + +define double @test_exp_f64(double %arg) { +; CHECK-LABEL: define double @test_exp_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call double @_Z3expd(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP]] +; + %exp = tail call double @_Z3expd(double %arg) + ret double %exp +} + +define <2 x double> @test_exp_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_exp_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x double> @_Z3expDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[EXP]] +; + %exp = tail call <2 x double> @_Z3expDv2_d(<2 x double> %arg) + ret <2 x double> %exp +} + +define <3 x double> @test_exp_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_exp_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <3 x double> @_Z3expDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[EXP]] +; + %exp = tail call <3 x double> @_Z3expDv3_d(<3 x double> %arg) + ret <3 x double> %exp +} + +define <4 x double> @test_exp_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_exp_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <4 x double> @_Z3expDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[EXP]] +; + %exp = tail call <4 x double> @_Z3expDv4_d(<4 x double> %arg) + ret <4 x double> %exp +} + +define <8 x double> @test_exp_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_exp_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <8 x double> @_Z3expDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[EXP]] +; + %exp = tail call <8 x double> @_Z3expDv8_d(<8 x double> %arg) + ret <8 x double> %exp +} + +define <16 x double> @test_exp_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_exp_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <16 x double> @_Z3expDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[EXP]] +; + %exp = tail call <16 x double> @_Z3expDv16_d(<16 x double> %arg) + ret <16 x double> %exp +} + +define half @test_exp_f16(half %arg) { +; CHECK-LABEL: define half @test_exp_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call half @_Z3expDh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP]] +; + %exp = tail call half @_Z3expDh(half %arg) + ret half %exp +} + +define half @test_exp_f16_fast(half %arg) { +; CHECK-LABEL: define half @test_exp_f16_fast +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast half @_Z3expDh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP]] +; + %exp = tail call fast half @_Z3expDh(half %arg) + ret half %exp +} + +define <2 x half> @test_exp_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_exp_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x half> @_Z3expDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[EXP]] +; + %exp = tail call <2 x half> @_Z3expDv2_Dh(<2 x half> %arg) + ret <2 x half> %exp +} + +define <3 x half> @test_exp_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_exp_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <3 x half> @_Z3expDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[EXP]] +; + %exp = tail call <3 x half> @_Z3expDv3_Dh(<3 x half> %arg) + ret <3 x half> %exp +} + +define <4 x half> @test_exp_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_exp_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <4 x half> @_Z3expDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[EXP]] +; + %exp = tail call <4 x half> @_Z3expDv4_Dh(<4 x half> %arg) + ret <4 x half> %exp +} + +define <8 x half> @test_exp_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_exp_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <8 x half> @_Z3expDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[EXP]] +; + %exp = tail call <8 x half> @_Z3expDv8_Dh(<8 x half> %arg) + ret <8 x half> %exp +} + +define <16 x half> @test_exp_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_exp_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <16 x half> @_Z3expDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[EXP]] +; + %exp = tail call <16 x half> @_Z3expDv16_Dh(<16 x half> %arg) + ret <16 x half> %exp +} + +define float @test_exp_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_exp_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0 + ret float %exp +} + +define <2 x float> @test_exp_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %exp +} + +define float @test_exp_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_exp_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg) #0 + ret float %exp +} + +define <2 x float> @test_exp_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0 + ret <2 x float> %exp +} + +; "no-builtins" should be ignored +define float @test_exp_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_exp_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0 + ret float %exp +} + +define <2 x float> @test_exp_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_exp_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %exp +} + +define float @test_exp_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_exp_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @_Z3expf(float %arg) #0 + ret float %exp +} + +define <2 x float> @test_exp_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0 + ret <2 x float> %exp +} + +define float @test_exp_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_exp_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call nnan ninf float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nnan ninf float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define <2 x float> @test_exp_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call nnan nsz contract <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call contract nsz nnan <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %exp +} + +define float @test_exp_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_exp_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call nnan ninf float @_Z3expf(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nnan ninf float @_Z3expf(float %arg), !fpmath !0, !foo !1 + ret float %exp +} + +define <2 x float> @test_exp_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call nnan nsz contract <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call contract nsz nnan <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %exp +} + +define float @test_exp_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_exp_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call ninf contract float @_Z3expf(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call ninf contract float @_Z3expf(float %arg) + ret float %exp +} + +define <2 x float> @test_exp_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call nnan nsz <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[EXP]] +; + %exp = tail call nnan nsz <2 x float> @_Z3expDv2_f(<2 x float> %arg) + ret <2 x float> %exp +} + +; Test the libm name, not a recognized opencl builtin. +declare float @expf(float) #2 +declare double @exp(double) #2 + +define float @test_libm_exp_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_exp_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @expf(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @expf(float %arg) + ret float %exp +} + +define float @test_libm_exp_f32_fast(float %arg) { +; CHECK-LABEL: define float @test_libm_exp_f32_fast +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast float @expf(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call fast float @expf(float %arg) + ret float %exp +} + +define float @test_libm_exp_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_exp_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call float @expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call float @expf(float %arg), !fpmath !0 + ret float %exp +} + +define double @test_libm_exp_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_exp_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call double @exp(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP]] +; + %exp = tail call double @exp(double %arg) + ret double %exp +} + +define double @test_libm_exp_f64_fast(double %arg) { +; CHECK-LABEL: define double @test_libm_exp_f64_fast +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast double @exp(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP]] +; + %exp = tail call fast double @exp(double %arg) + ret double %exp +} + +define double @test_libm_exp_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_exp_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call double @exp(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[EXP]] +; + %exp = tail call double @exp(double %arg), !fpmath !0 + ret double %exp +} + +define float @test_exp_f32_fast_noinline(float %arg) { +; CHECK-LABEL: define float @test_exp_f32_fast_noinline +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call fast float @_Z3expf(float %arg) #3, !fpmath !0 + ret float %exp +} + +define float @test_exp_f32_fast_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_exp_f32_fast_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call fast float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define float @test_exp_f32_fast_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_exp_f32_fast_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call fast float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define float @test_exp_f32_nsz_contract_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_exp_f32_nsz_contract_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call nsz contract float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nsz contract float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define float @test_exp_f32_nsz_contract_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_exp_f32_nsz_contract_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call nsz contract float @_Z3expf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nsz contract float @_Z3expf(float %arg), !fpmath !0 + ret float %exp +} + +define half @test_exp_f16_fast_minsize(half %arg) #5 { +; CHECK-LABEL: define half @test_exp_f16_fast_minsize +; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call half @_Z3expDh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP]] +; + %exp = tail call half @_Z3expDh(half %arg) + ret half %exp +} + +define float @test_exp_f32_strictfp(float %arg) #6 { +; CHECK-LABEL: define float @test_exp_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call nsz float @_Z3expf(float [[ARG]]) #[[ATTR4]] +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nsz float @_Z3expf(float %arg) #6 + ret float %exp +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { noinline } +attributes #4 = { optsize } +attributes #5 = { minsize } +attributes #6 = { strictfp } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4exp2f(float) +declare <2 x float> @_Z4exp2Dv2_f(<2 x float>) +declare <3 x float> @_Z4exp2Dv3_f(<3 x float>) +declare <4 x float> @_Z4exp2Dv4_f(<4 x float>) +declare <8 x float> @_Z4exp2Dv8_f(<8 x float>) +declare <16 x float> @_Z4exp2Dv16_f(<16 x float>) + +declare double @_Z4exp2d(double) +declare <2 x double> @_Z4exp2Dv2_d(<2 x double>) +declare <3 x double> @_Z4exp2Dv3_d(<3 x double>) +declare <4 x double> @_Z4exp2Dv4_d(<4 x double>) +declare <8 x double> @_Z4exp2Dv8_d(<8 x double>) +declare <16 x double> @_Z4exp2Dv16_d(<16 x double>) + +declare half @_Z4exp2Dh(half) +declare <2 x half> @_Z4exp2Dv2_Dh(<2 x half>) +declare <3 x half> @_Z4exp2Dv3_Dh(<3 x half>) +declare <4 x half> @_Z4exp2Dv4_Dh(<4 x half>) +declare <8 x half> @_Z4exp2Dv8_Dh(<8 x half>) +declare <16 x half> @_Z4exp2Dv16_Dh(<16 x half>) + +define float @test_exp2_f32(float %arg) { +; CHECK-LABEL: define float @test_exp2_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define <2 x float> @test_exp2_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %exp2 +} + +define <3 x float> @test_exp2_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_exp2_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[EXP2]] +; + %exp2 = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %exp2 +} + +define <4 x float> @test_exp2_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_exp2_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[EXP2]] +; + %exp2 = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %exp2 +} + +define <8 x float> @test_exp2_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_exp2_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[EXP2]] +; + %exp2 = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %exp2 +} + +define <16 x float> @test_exp2_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_exp2_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[EXP2]] +; + %exp2 = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %exp2 +} + +define float @test_exp2_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_exp2_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg) + ret float %exp2 +} + +define <2 x float> @test_exp2_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) + ret <2 x float> %exp2 +} + +define <3 x float> @test_exp2_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_exp2_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[EXP2]] +; + %exp2 = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> %arg) + ret <3 x float> %exp2 +} + +define <4 x float> @test_exp2_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_exp2_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[EXP2]] +; + %exp2 = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> %arg) + ret <4 x float> %exp2 +} + +define <8 x float> @test_exp2_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_exp2_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[EXP2]] +; + %exp2 = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> %arg) + ret <8 x float> %exp2 +} + +define <16 x float> @test_exp2_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_exp2_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[EXP2]] +; + %exp2 = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> %arg) + ret <16 x float> %exp2 +} + +define double @test_exp2_f64(double %arg) { +; CHECK-LABEL: define double @test_exp2_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call double @_Z4exp2d(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP2]] +; + %exp2 = tail call double @_Z4exp2d(double %arg) + ret double %exp2 +} + +define <2 x double> @test_exp2_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_exp2_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @_Z4exp2Dv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[EXP2]] +; + %exp2 = tail call <2 x double> @_Z4exp2Dv2_d(<2 x double> %arg) + ret <2 x double> %exp2 +} + +define <3 x double> @test_exp2_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_exp2_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <3 x double> @_Z4exp2Dv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[EXP2]] +; + %exp2 = tail call <3 x double> @_Z4exp2Dv3_d(<3 x double> %arg) + ret <3 x double> %exp2 +} + +define <4 x double> @test_exp2_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_exp2_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <4 x double> @_Z4exp2Dv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[EXP2]] +; + %exp2 = tail call <4 x double> @_Z4exp2Dv4_d(<4 x double> %arg) + ret <4 x double> %exp2 +} + +define <8 x double> @test_exp2_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_exp2_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <8 x double> @_Z4exp2Dv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[EXP2]] +; + %exp2 = tail call <8 x double> @_Z4exp2Dv8_d(<8 x double> %arg) + ret <8 x double> %exp2 +} + +define <16 x double> @test_exp2_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_exp2_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <16 x double> @_Z4exp2Dv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[EXP2]] +; + %exp2 = tail call <16 x double> @_Z4exp2Dv16_d(<16 x double> %arg) + ret <16 x double> %exp2 +} + +define half @test_exp2_f16(half %arg) { +; CHECK-LABEL: define half @test_exp2_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call half @_Z4exp2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP2]] +; + %exp2 = tail call half @_Z4exp2Dh(half %arg) + ret half %exp2 +} + +define half @test_exp2_f16_fast(half %arg) { +; CHECK-LABEL: define half @test_exp2_f16_fast +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast half @_Z4exp2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP2]] +; + %exp2 = tail call fast half @_Z4exp2Dh(half %arg) + ret half %exp2 +} + +define <2 x half> @test_exp2_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_exp2_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @_Z4exp2Dv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[EXP2]] +; + %exp2 = tail call <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %arg) + ret <2 x half> %exp2 +} + +define <3 x half> @test_exp2_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_exp2_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <3 x half> @_Z4exp2Dv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[EXP2]] +; + %exp2 = tail call <3 x half> @_Z4exp2Dv3_Dh(<3 x half> %arg) + ret <3 x half> %exp2 +} + +define <4 x half> @test_exp2_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_exp2_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <4 x half> @_Z4exp2Dv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[EXP2]] +; + %exp2 = tail call <4 x half> @_Z4exp2Dv4_Dh(<4 x half> %arg) + ret <4 x half> %exp2 +} + +define <8 x half> @test_exp2_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_exp2_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <8 x half> @_Z4exp2Dv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[EXP2]] +; + %exp2 = tail call <8 x half> @_Z4exp2Dv8_Dh(<8 x half> %arg) + ret <8 x half> %exp2 +} + +define <16 x half> @test_exp2_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_exp2_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <16 x half> @_Z4exp2Dv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[EXP2]] +; + %exp2 = tail call <16 x half> @_Z4exp2Dv16_Dh(<16 x half> %arg) + ret <16 x half> %exp2 +} + +define float @test_exp2_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_exp2_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0 + ret float %exp2 +} + +define <2 x float> @test_exp2_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %exp2 +} + +define float @test_exp2_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_exp2_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg) #0 + ret float %exp2 +} + +define <2 x float> @test_exp2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %exp2 +} + +; "no-builtins" should be ignored +define float @test_exp2_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_exp2_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0 + ret float %exp2 +} + +define <2 x float> @test_exp2_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %exp2 +} + +define float @test_exp2_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_exp2_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @_Z4exp2f(float %arg) #0 + ret float %exp2 +} + +define <2 x float> @test_exp2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %exp2 +} + +define float @test_exp2_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_exp2_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call nnan ninf float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define <2 x float> @test_exp2_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call contract nsz nnan <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %exp2 +} + +define float @test_exp2_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_exp2_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan ninf float @_Z4exp2f(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call nnan ninf float @_Z4exp2f(float %arg), !fpmath !0, !foo !1 + ret float %exp2 +} + +define <2 x float> @test_exp2_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call contract nsz nnan <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %exp2 +} + +define float @test_exp2_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_exp2_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call ninf contract float @_Z4exp2f(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call ninf contract float @_Z4exp2f(float %arg) + ret float %exp2 +} + +define <2 x float> @test_exp2_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan nsz <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[EXP2]] +; + %exp2 = tail call nnan nsz <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) + ret <2 x float> %exp2 +} + +; Test the libm name, not a recognized opencl builtin. +declare float @exp2f(float) #2 +declare double @exp2(double) #2 + +define float @test_libm_exp2_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_exp2_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @exp2f(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @exp2f(float %arg) + ret float %exp2 +} + +define float @test_libm_exp2_f32_fast(float %arg) { +; CHECK-LABEL: define float @test_libm_exp2_f32_fast +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @exp2f(float [[ARG]]) +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call fast float @exp2f(float %arg) + ret float %exp2 +} + +define float @test_libm_exp2_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_exp2_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call float @exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define double @test_libm_exp2_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_exp2_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call double @exp2(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP2]] +; + %exp2 = tail call double @exp2(double %arg) + ret double %exp2 +} + +define double @test_libm_exp2_f64_fast(double %arg) { +; CHECK-LABEL: define double @test_libm_exp2_f64_fast +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast double @exp2(double [[ARG]]) +; CHECK-NEXT: ret double [[EXP2]] +; + %exp2 = tail call fast double @exp2(double %arg) + ret double %exp2 +} + +define double @test_libm_exp2_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_exp2_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call double @exp2(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[EXP2]] +; + %exp2 = tail call double @exp2(double %arg), !fpmath !0 + ret double %exp2 +} + +define float @test_exp2_f32_fast_noinline(float %arg) { +; CHECK-LABEL: define float @test_exp2_f32_fast_noinline +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call fast float @_Z4exp2f(float %arg) #3, !fpmath !0 + ret float %exp2 +} + +define float @test_exp2_f32_fast_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_exp2_f32_fast_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call fast float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define float @test_exp2_f32_fast_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_exp2_f32_fast_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call fast float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define float @test_exp2_f32_nsz_contract_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz contract float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call nsz contract float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define float @test_exp2_f32_nsz_contract_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz contract float @_Z4exp2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[EXP2]] +; + %exp2 = tail call nsz contract float @_Z4exp2f(float %arg), !fpmath !0 + ret float %exp2 +} + +define half @test_exp2_f16_fast_minsize(half %arg) #5 { +; CHECK-LABEL: define half @test_exp2_f16_fast_minsize +; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fast half @_Z4exp2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[EXP2]] +; + %exp2 = tail call fast half @_Z4exp2Dh(half %arg) + ret half %exp2 +} + +define float @test_exp2_f32_strictfp(float %arg) #6 { +; CHECK-LABEL: define float @test_exp2_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[EXP:%.*]] = tail call nsz float @_Z4exp2f(float [[ARG]]) #[[ATTR4]] +; CHECK-NEXT: ret float [[EXP]] +; + %exp = tail call nsz float @_Z4exp2f(float %arg) #6 + ret float %exp +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { noinline } +attributes #4 = { optsize } +attributes #5 = { minsize } +attributes #6 = { strictfp } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4fabsf(float) +declare <2 x float> @_Z4fabsDv2_f(<2 x float>) +declare <3 x float> @_Z4fabsDv3_f(<3 x float>) +declare <4 x float> @_Z4fabsDv4_f(<4 x float>) +declare <8 x float> @_Z4fabsDv8_f(<8 x float>) +declare <16 x float> @_Z4fabsDv16_f(<16 x float>) + +declare double @_Z4fabsd(double) +declare <2 x double> @_Z4fabsDv2_d(<2 x double>) +declare <3 x double> @_Z4fabsDv3_d(<3 x double>) +declare <4 x double> @_Z4fabsDv4_d(<4 x double>) +declare <8 x double> @_Z4fabsDv8_d(<8 x double>) +declare <16 x double> @_Z4fabsDv16_d(<16 x double>) + +declare half @_Z4fabsDh(half) +declare <2 x half> @_Z4fabsDv2_Dh(<2 x half>) +declare <3 x half> @_Z4fabsDv3_Dh(<3 x half>) +declare <4 x half> @_Z4fabsDv4_Dh(<4 x half>) +declare <8 x half> @_Z4fabsDv8_Dh(<8 x half>) +declare <16 x half> @_Z4fabsDv16_Dh(<16 x half>) + +define float @test_fabs_f32(float %arg) { +; CHECK-LABEL: define float @test_fabs_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call float @_Z4fabsf(float %arg) + ret float %fabs +} + +define <2 x float> @test_fabs_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_fabs_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) + ret <2 x float> %fabs +} + +define <3 x float> @test_fabs_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_fabs_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[FABS]] +; + %fabs = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> %arg) + ret <3 x float> %fabs +} + +define <4 x float> @test_fabs_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_fabs_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[FABS]] +; + %fabs = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> %arg) + ret <4 x float> %fabs +} + +define <8 x float> @test_fabs_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_fabs_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[FABS]] +; + %fabs = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> %arg) + ret <8 x float> %fabs +} + +define <16 x float> @test_fabs_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_fabs_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[FABS]] +; + %fabs = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> %arg) + ret <16 x float> %fabs +} + +define double @test_fabs_f64(double %arg) { +; CHECK-LABEL: define double @test_fabs_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call double @_Z4fabsd(double [[ARG]]) +; CHECK-NEXT: ret double [[FABS]] +; + %fabs = tail call double @_Z4fabsd(double %arg) + ret double %fabs +} + +define <2 x double> @test_fabs_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_fabs_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[FABS]] +; + %fabs = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> %arg) + ret <2 x double> %fabs +} + +define <3 x double> @test_fabs_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_fabs_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[FABS]] +; + %fabs = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> %arg) + ret <3 x double> %fabs +} + +define <4 x double> @test_fabs_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_fabs_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[FABS]] +; + %fabs = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> %arg) + ret <4 x double> %fabs +} + +define <8 x double> @test_fabs_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_fabs_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[FABS]] +; + %fabs = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> %arg) + ret <8 x double> %fabs +} + +define <16 x double> @test_fabs_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_fabs_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[FABS]] +; + %fabs = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> %arg) + ret <16 x double> %fabs +} + +define half @test_fabs_f16(half %arg) { +; CHECK-LABEL: define half @test_fabs_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call half @_Z4fabsDh(half [[ARG]]) +; CHECK-NEXT: ret half [[FABS]] +; + %fabs = tail call half @_Z4fabsDh(half %arg) + ret half %fabs +} + +define <2 x half> @test_fabs_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_fabs_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[FABS]] +; + %fabs = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> %arg) + ret <2 x half> %fabs +} + +define <3 x half> @test_fabs_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_fabs_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[FABS]] +; + %fabs = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> %arg) + ret <3 x half> %fabs +} + +define <4 x half> @test_fabs_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_fabs_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[FABS]] +; + %fabs = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> %arg) + ret <4 x half> %fabs +} + +define <8 x half> @test_fabs_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_fabs_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[FABS]] +; + %fabs = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> %arg) + ret <8 x half> %fabs +} + +define <16 x half> @test_fabs_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_fabs_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[FABS]] +; + %fabs = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> %arg) + ret <16 x half> %fabs +} + +define float @test_fabs_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_fabs_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call float @_Z4fabsf(float %arg) #0 + ret float %fabs +} + +define <2 x float> @test_fabs_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0 + ret <2 x float> %fabs +} + +; "no-builtins" should be ignored +define float @test_fabs_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_fabs_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call float @_Z4fabsf(float %arg) #0 + ret float %fabs +} + +define <2 x float> @test_fabs_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0 + ret <2 x float> %fabs +} + +define float @test_fabs_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_fabs_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]]) +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call nnan ninf float @_Z4fabsf(float %arg) + ret float %fabs +} + +define <2 x float> @test_fabs_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) + ret <2 x float> %fabs +} + +define float @test_fabs_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_fabs_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call nnan ninf float @_Z4fabsf(float %arg), !foo !0 + ret float %fabs +} + +define <2 x float> @test_fabs_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %fabs +} + +; Test the libm name, not a recognized opencl builtin. +declare float @fabsf(float) #2 +declare double @fabs(double) #2 + +define float @test_libm_fabs_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_fabs_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call float @fabsf(float [[ARG]]) +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call float @fabsf(float %arg) + ret float %fabs +} + +define double @test_libm_fabs_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_fabs_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[FABS:%.*]] = tail call double @fabs(double [[ARG]]) +; CHECK-NEXT: ret double [[FABS]] +; + %fabs = tail call double @fabs(double %arg) + ret double %fabs +} + +define float @test_fabs_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_fabs_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[FABS:%.*]] = tail call nnan float @_Z4fabsf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = tail call nnan float @_Z4fabsf(float %arg) #3 + ret float %fabs +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z5floorf(float) +declare <2 x float> @_Z5floorDv2_f(<2 x float>) +declare <3 x float> @_Z5floorDv3_f(<3 x float>) +declare <4 x float> @_Z5floorDv4_f(<4 x float>) +declare <8 x float> @_Z5floorDv8_f(<8 x float>) +declare <16 x float> @_Z5floorDv16_f(<16 x float>) + +declare double @_Z5floord(double) +declare <2 x double> @_Z5floorDv2_d(<2 x double>) +declare <3 x double> @_Z5floorDv3_d(<3 x double>) +declare <4 x double> @_Z5floorDv4_d(<4 x double>) +declare <8 x double> @_Z5floorDv8_d(<8 x double>) +declare <16 x double> @_Z5floorDv16_d(<16 x double>) + +declare half @_Z5floorDh(half) +declare <2 x half> @_Z5floorDv2_Dh(<2 x half>) +declare <3 x half> @_Z5floorDv3_Dh(<3 x half>) +declare <4 x half> @_Z5floorDv4_Dh(<4 x half>) +declare <8 x half> @_Z5floorDv8_Dh(<8 x half>) +declare <16 x half> @_Z5floorDv16_Dh(<16 x half>) + +define float @test_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5floorf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define <3 x float> @test_rint_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_rint_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x float> @_Z5floorDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[RINT]] +; + %rint = tail call <3 x float> @_Z5floorDv3_f(<3 x float> %arg) + ret <3 x float> %rint +} + +define <4 x float> @test_rint_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_rint_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x float> @_Z5floorDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[RINT]] +; + %rint = tail call <4 x float> @_Z5floorDv4_f(<4 x float> %arg) + ret <4 x float> %rint +} + +define <8 x float> @test_rint_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_rint_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x float> @_Z5floorDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[RINT]] +; + %rint = tail call <8 x float> @_Z5floorDv8_f(<8 x float> %arg) + ret <8 x float> %rint +} + +define <16 x float> @test_rint_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_rint_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x float> @_Z5floorDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[RINT]] +; + %rint = tail call <16 x float> @_Z5floorDv16_f(<16 x float> %arg) + ret <16 x float> %rint +} + +define double @test_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @_Z5floord(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @_Z5floord(double %arg) + ret double %rint +} + +define <2 x double> @test_rint_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_rint_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x double> @_Z5floorDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[RINT]] +; + %rint = tail call <2 x double> @_Z5floorDv2_d(<2 x double> %arg) + ret <2 x double> %rint +} + +define <3 x double> @test_rint_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_rint_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x double> @_Z5floorDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[RINT]] +; + %rint = tail call <3 x double> @_Z5floorDv3_d(<3 x double> %arg) + ret <3 x double> %rint +} + +define <4 x double> @test_rint_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_rint_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x double> @_Z5floorDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[RINT]] +; + %rint = tail call <4 x double> @_Z5floorDv4_d(<4 x double> %arg) + ret <4 x double> %rint +} + +define <8 x double> @test_rint_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_rint_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x double> @_Z5floorDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[RINT]] +; + %rint = tail call <8 x double> @_Z5floorDv8_d(<8 x double> %arg) + ret <8 x double> %rint +} + +define <16 x double> @test_rint_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_rint_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x double> @_Z5floorDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[RINT]] +; + %rint = tail call <16 x double> @_Z5floorDv16_d(<16 x double> %arg) + ret <16 x double> %rint +} + +define half @test_rint_f16(half %arg) { +; CHECK-LABEL: define half @test_rint_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call half @_Z5floorDh(half [[ARG]]) +; CHECK-NEXT: ret half [[RINT]] +; + %rint = tail call half @_Z5floorDh(half %arg) + ret half %rint +} + +define <2 x half> @test_rint_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_rint_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x half> @_Z5floorDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[RINT]] +; + %rint = tail call <2 x half> @_Z5floorDv2_Dh(<2 x half> %arg) + ret <2 x half> %rint +} + +define <3 x half> @test_rint_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_rint_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x half> @_Z5floorDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[RINT]] +; + %rint = tail call <3 x half> @_Z5floorDv3_Dh(<3 x half> %arg) + ret <3 x half> %rint +} + +define <4 x half> @test_rint_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_rint_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x half> @_Z5floorDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[RINT]] +; + %rint = tail call <4 x half> @_Z5floorDv4_Dh(<4 x half> %arg) + ret <4 x half> %rint +} + +define <8 x half> @test_rint_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_rint_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x half> @_Z5floorDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[RINT]] +; + %rint = tail call <8 x half> @_Z5floorDv8_Dh(<8 x half> %arg) + ret <8 x half> %rint +} + +define <16 x half> @test_rint_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_rint_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x half> @_Z5floorDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[RINT]] +; + %rint = tail call <16 x half> @_Z5floorDv16_Dh(<16 x half> %arg) + ret <16 x half> %rint +} + +define float @test_rint_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5floorf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +; "no-builtins" should be ignored +define float @test_rint_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_rint_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5floorf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5floorf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5floorf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5floorDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5floorf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5floorf(float %arg), !foo !0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5floorDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %rint +} + +; Test the libm name, not a recognized opencl builtin. +declare float @rintf(float) #2 +declare double @rint(double) #2 + +define float @test_libm_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @rintf(float %arg) + ret float %rint +} + +define double @test_libm_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @rint(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @rint(double %arg) + ret double %rint +} + +define float @test_rint_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_rint_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan float @_Z5floorf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan float @_Z5floorf(float %arg) #3 + ret float %rint +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z3fmafff(float, float, float) +declare <2 x float> @_Z3fmaDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>) +declare <3 x float> @_Z3fmaDv3_fS_S_(<3 x float>, <3 x float>, <3 x float>) +declare <4 x float> @_Z3fmaDv4_fS_S_(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @_Z3fmaDv8_fS_S_(<8 x float>, <8 x float>, <8 x float>) +declare <16 x float> @_Z3fmaDv16_fS_S_(<16 x float>, <16 x float>, <16 x float>) +declare double @_Z3fmaddd(double, double, double) +declare <2 x double> @_Z3fmaDv2_dS_S_(<2 x double>, <2 x double>, <2 x double>) +declare <3 x double> @_Z3fmaDv3_dS_S_(<3 x double>, <3 x double>, <3 x double>) +declare <4 x double> @_Z3fmaDv4_dS_S_(<4 x double>, <4 x double>, <4 x double>) +declare <8 x double> @_Z3fmaDv8_dS_S_(<8 x double>, <8 x double>, <8 x double>) +declare <16 x double> @_Z3fmaDv16_dS_S_(<16 x double>, <16 x double>, <16 x double>) +declare half @_Z3fmaDhDhDh(half, half, half) +declare <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half>, <2 x half>, <2 x half>) +declare <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half>, <3 x half>, <3 x half>) +declare <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half>, <8 x half>, <8 x half>) +declare <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half>, <16 x half>, <16 x half>) + +define float @test_fma_f32(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_fma_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call float @_Z3fmafff(float %x, float %y, float %z) + ret float %fma +} + +define <2 x float> @test_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: define <2 x float> @test_fma_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> [[X]], <2 x float> [[Y]], <2 x float> [[Z]]) +; CHECK-NEXT: ret <2 x float> [[FMA]] +; + %fma = tail call <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %x, <2 x float> %y, <2 x float> %z) + ret <2 x float> %fma +} + +define <3 x float> @test_fma_v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z) { +; CHECK-LABEL: define <3 x float> @test_fma_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]], <3 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> [[X]], <3 x float> [[Y]], <3 x float> [[Z]]) +; CHECK-NEXT: ret <3 x float> [[FMA]] +; + %fma = tail call <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %x, <3 x float> %y, <3 x float> %z) + ret <3 x float> %fma +} + +define <4 x float> @test_fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @test_fma_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> [[X]], <4 x float> [[Y]], <4 x float> [[Z]]) +; CHECK-NEXT: ret <4 x float> [[FMA]] +; + %fma = tail call <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %x, <4 x float> %y, <4 x float> %z) + ret <4 x float> %fma +} + +define <8 x float> @test_fma_v8f32(<8 x float> %x, <8 x float> %y, <8 x float> %z) { +; CHECK-LABEL: define <8 x float> @test_fma_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]], <8 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> [[X]], <8 x float> [[Y]], <8 x float> [[Z]]) +; CHECK-NEXT: ret <8 x float> [[FMA]] +; + %fma = tail call <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %x, <8 x float> %y, <8 x float> %z) + ret <8 x float> %fma +} + +define <16 x float> @test_fma_v16f32(<16 x float> %x, <16 x float> %y, <16 x float> %z) { +; CHECK-LABEL: define <16 x float> @test_fma_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]], <16 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> [[X]], <16 x float> [[Y]], <16 x float> [[Z]]) +; CHECK-NEXT: ret <16 x float> [[FMA]] +; + %fma = tail call <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %x, <16 x float> %y, <16 x float> %z) + ret <16 x float> %fma +} + +define double @test_fma_f64(double %x, double %y, double %z) { +; CHECK-LABEL: define double @test_fma_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call double @_Z3fmaddd(double [[X]], double [[Y]], double [[Z]]) +; CHECK-NEXT: ret double [[FMA]] +; + %fma = tail call double @_Z3fmaddd(double %x, double %y, double %z) + ret double %fma +} + +define <2 x double> @test_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { +; CHECK-LABEL: define <2 x double> @test_fma_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> [[X]], <2 x double> [[Y]], <2 x double> [[Z]]) +; CHECK-NEXT: ret <2 x double> [[FMA]] +; + %fma = tail call <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %x, <2 x double> %y, <2 x double> %z) + ret <2 x double> %fma +} + +define <3 x double> @test_fma_v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z) { +; CHECK-LABEL: define <3 x double> @test_fma_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]], <3 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> [[X]], <3 x double> [[Y]], <3 x double> [[Z]]) +; CHECK-NEXT: ret <3 x double> [[FMA]] +; + %fma = tail call <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> %x, <3 x double> %y, <3 x double> %z) + ret <3 x double> %fma +} + +define <4 x double> @test_fma_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) { +; CHECK-LABEL: define <4 x double> @test_fma_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]], <4 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> [[X]], <4 x double> [[Y]], <4 x double> [[Z]]) +; CHECK-NEXT: ret <4 x double> [[FMA]] +; + %fma = tail call <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> %x, <4 x double> %y, <4 x double> %z) + ret <4 x double> %fma +} + +define <8 x double> @test_fma_v8f64(<8 x double> %x, <8 x double> %y, <8 x double> %z) { +; CHECK-LABEL: define <8 x double> @test_fma_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]], <8 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> [[X]], <8 x double> [[Y]], <8 x double> [[Z]]) +; CHECK-NEXT: ret <8 x double> [[FMA]] +; + %fma = tail call <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> %x, <8 x double> %y, <8 x double> %z) + ret <8 x double> %fma +} + +define <16 x double> @test_fma_v16f64(<16 x double> %x, <16 x double> %y, <16 x double> %z) { +; CHECK-LABEL: define <16 x double> @test_fma_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]], <16 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> [[X]], <16 x double> [[Y]], <16 x double> [[Z]]) +; CHECK-NEXT: ret <16 x double> [[FMA]] +; + %fma = tail call <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> %x, <16 x double> %y, <16 x double> %z) + ret <16 x double> %fma +} + +define half @test_fma_f16(half %x, half %y, half %z) { +; CHECK-LABEL: define half @test_fma_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call half @_Z3fmaDhDhDh(half [[X]], half [[Y]], half [[Z]]) +; CHECK-NEXT: ret half [[FMA]] +; + %fma = tail call half @_Z3fmaDhDhDh(half %x, half %y, half %z) + ret half %fma +} + +define <2 x half> @test_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +; CHECK-LABEL: define <2 x half> @test_fma_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]], <2 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half> [[X]], <2 x half> [[Y]], <2 x half> [[Z]]) +; CHECK-NEXT: ret <2 x half> [[FMA]] +; + %fma = tail call <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half> %x, <2 x half> %y, <2 x half> %z) + ret <2 x half> %fma +} + +define <3 x half> @test_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; CHECK-LABEL: define <3 x half> @test_fma_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half> [[X]], <3 x half> [[Y]], <3 x half> [[Z]]) +; CHECK-NEXT: ret <3 x half> [[FMA]] +; + %fma = tail call <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half> %x, <3 x half> %y, <3 x half> %z) + ret <3 x half> %fma +} + +define <4 x half> @test_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +; CHECK-LABEL: define <4 x half> @test_fma_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]], <4 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half> [[X]], <4 x half> [[Y]], <4 x half> [[Z]]) +; CHECK-NEXT: ret <4 x half> [[FMA]] +; + %fma = tail call <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half> %x, <4 x half> %y, <4 x half> %z) + ret <4 x half> %fma +} + +define <8 x half> @test_fma_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { +; CHECK-LABEL: define <8 x half> @test_fma_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half> [[X]], <8 x half> [[Y]], <8 x half> [[Z]]) +; CHECK-NEXT: ret <8 x half> [[FMA]] +; + %fma = tail call <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half> %x, <8 x half> %y, <8 x half> %z) + ret <8 x half> %fma +} + +define <16 x half> @test_fma_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { +; CHECK-LABEL: define <16 x half> @test_fma_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]], <16 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half> [[X]], <16 x half> [[Y]], <16 x half> [[Z]]) +; CHECK-NEXT: ret <16 x half> [[FMA]] +; + %fma = tail call <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half> %x, <16 x half> %y, <16 x half> %z) + ret <16 x half> %fma +} + +define float @test_fma_f32_fast(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_fma_f32_fast +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) + ret float %fma +} + +define float @test_fma_f32_noinline(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_fma_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #1 + ret float %fma +} + +define float @test_fma_f32_fast_minsize(float %x, float %y, float %z) #0 { +; CHECK-LABEL: define float @test_fma_f32_fast_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) + ret float %fma +} + +define float @test_fma_f32_fast_strictfp(float %x, float %y, float %z) #2 { +; CHECK-LABEL: define float @test_fma_f32_fast_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[FMA:%.*]] = tail call nnan nsz float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call nsz nnan float @_Z3fmafff(float %x, float %y, float %z) #2 + ret float %fma +} + +define float @test_fma_f32_fast_nobuiltin(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_fma_f32_fast_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[FMA]] +; + %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #3 + ret float %fma +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll @@ -0,0 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4fmaxff(float, float) +declare <2 x float> @_Z4fmaxDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z4fmaxDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z4fmaxDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z4fmaxDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z4fmaxDv16_fS_(<16 x float>, <16 x float>) +declare double @_Z4fmaxdd(double, double) +declare <2 x double> @_Z4fmaxDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z4fmaxDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z4fmaxDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z4fmaxDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z4fmaxDv16_dS_(<16 x double>, <16 x double>) +declare half @_Z4fmaxDhDh(half, half) +declare <2 x half> @_Z4fmaxDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z4fmaxDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z4fmaxDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z4fmaxDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z4fmaxDv16_DhS_(<16 x half>, <16 x half>) + +define float @test_fmax_f32(float %x, float %y) { +; CHECK-LABEL: define float @test_fmax_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call float @_Z4fmaxff(float %x, float %y) + ret float %fmax +} + +define float @test_fmax_f32_nnan(float %x, float %y) { +; CHECK-LABEL: define float @test_fmax_f32_nnan +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) + ret float %fmax +} + +define <2 x float> @test_fmax_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_fmax_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[FMAX]] +; + %fmax = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %fmax +} + +define <3 x float> @test_fmax_v3f32(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_fmax_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[FMAX]] +; + %fmax = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %x, <3 x float> %y) + ret <3 x float> %fmax +} + +define <4 x float> @test_fmax_v4f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_fmax_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[FMAX]] +; + %fmax = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %x, <4 x float> %y) + ret <4 x float> %fmax +} + +define <8 x float> @test_fmax_v8f32(<8 x float> %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_fmax_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <8 x float> @_Z4fmaxDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[FMAX]] +; + %fmax = tail call <8 x float> @_Z4fmaxDv8_fS_(<8 x float> %x, <8 x float> %y) + ret <8 x float> %fmax +} + +define <16 x float> @test_fmax_v16f32(<16 x float> %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_fmax_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <16 x float> @_Z4fmaxDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[FMAX]] +; + %fmax = tail call <16 x float> @_Z4fmaxDv16_fS_(<16 x float> %x, <16 x float> %y) + ret <16 x float> %fmax +} + +define double @test_fmax_f64(double %x, double %y) { +; CHECK-LABEL: define double @test_fmax_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call double @_Z4fmaxdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[FMAX]] +; + %fmax = tail call double @_Z4fmaxdd(double %x, double %y) + ret double %fmax +} + +define <2 x double> @test_fmax_v2f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_fmax_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <2 x double> @_Z4fmaxDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[FMAX]] +; + %fmax = tail call <2 x double> @_Z4fmaxDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %fmax +} + +define <3 x double> @test_fmax_v3f64(<3 x double> %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_fmax_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <3 x double> @_Z4fmaxDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[FMAX]] +; + %fmax = tail call <3 x double> @_Z4fmaxDv3_dS_(<3 x double> %x, <3 x double> %y) + ret <3 x double> %fmax +} + +define <4 x double> @test_fmax_v4f64(<4 x double> %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_fmax_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <4 x double> @_Z4fmaxDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[FMAX]] +; + %fmax = tail call <4 x double> @_Z4fmaxDv4_dS_(<4 x double> %x, <4 x double> %y) + ret <4 x double> %fmax +} + +define <8 x double> @test_fmax_v8f64(<8 x double> %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_fmax_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <8 x double> @_Z4fmaxDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[FMAX]] +; + %fmax = tail call <8 x double> @_Z4fmaxDv8_dS_(<8 x double> %x, <8 x double> %y) + ret <8 x double> %fmax +} + +define <16 x double> @test_fmax_v16f64(<16 x double> %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_fmax_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <16 x double> @_Z4fmaxDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[FMAX]] +; + %fmax = tail call <16 x double> @_Z4fmaxDv16_dS_(<16 x double> %x, <16 x double> %y) + ret <16 x double> %fmax +} + +define half @test_fmax_f16(half %x, half %y) { +; CHECK-LABEL: define half @test_fmax_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call half @_Z4fmaxDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[FMAX]] +; + %fmax = tail call half @_Z4fmaxDhDh(half %x, half %y) + ret half %fmax +} + +define <2 x half> @test_fmax_v2f16(<2 x half> %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_fmax_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <2 x half> @_Z4fmaxDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[FMAX]] +; + %fmax = tail call <2 x half> @_Z4fmaxDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %fmax +} + +define <3 x half> @test_fmax_v3f16(<3 x half> %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_fmax_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <3 x half> @_Z4fmaxDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[FMAX]] +; + %fmax = tail call <3 x half> @_Z4fmaxDv3_DhS_(<3 x half> %x, <3 x half> %y) + ret <3 x half> %fmax +} + +define <4 x half> @test_fmax_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_fmax_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <4 x half> @_Z4fmaxDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[FMAX]] +; + %fmax = tail call <4 x half> @_Z4fmaxDv4_DhS_(<4 x half> %x, <4 x half> %y) + ret <4 x half> %fmax +} + +define <8 x half> @test_fmax_v8f16(<8 x half> %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_fmax_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <8 x half> @_Z4fmaxDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[FMAX]] +; + %fmax = tail call <8 x half> @_Z4fmaxDv8_DhS_(<8 x half> %x, <8 x half> %y) + ret <8 x half> %fmax +} + +define <16 x half> @test_fmax_v16f16(<16 x half> %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_fmax_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call <16 x half> @_Z4fmaxDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[FMAX]] +; + %fmax = tail call <16 x half> @_Z4fmaxDv16_DhS_(<16 x half> %x, <16 x half> %y) + ret <16 x half> %fmax +} + +define float @test_fmax_f32_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_fmax_f32_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call float @_Z4fmaxff(float %x, float %y) + ret float %fmax +} + +define float @test_fmax_f32_nnan_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_fmax_f32_nnan_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) + ret float %fmax +} + +define float @test_fmax_f32_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_fmax_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call float @_Z4fmaxff(float %x, float %y) #1 + ret float %fmax +} + +define float @test_fmax_f32_nnan_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_fmax_f32_nnan_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) #1 + ret float %fmax +} + +define float @test_fmax_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @test_fmax_f32_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[FMAX:%.*]] = tail call nnan nsz float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call nsz nnan float @_Z4fmaxff(float %x, float %y) #2 + ret float %fmax +} + +define float @test_fmax_f32_fast_nobuiltin(float %x, float %y) { +; CHECK-LABEL: define float @test_fmax_f32_fast_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMAX:%.*]] = tail call fast float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[FMAX]] +; + %fmax = tail call fast float @_Z4fmaxff(float %x, float %y) #3 + ret float %fmax +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll @@ -0,0 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4fminff(float, float) +declare <2 x float> @_Z4fminDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z4fminDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z4fminDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z4fminDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z4fminDv16_fS_(<16 x float>, <16 x float>) +declare double @_Z4fmindd(double, double) +declare <2 x double> @_Z4fminDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z4fminDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z4fminDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z4fminDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z4fminDv16_dS_(<16 x double>, <16 x double>) +declare half @_Z4fminDhDh(half, half) +declare <2 x half> @_Z4fminDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z4fminDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z4fminDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z4fminDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z4fminDv16_DhS_(<16 x half>, <16 x half>) + +define float @test_fmin_f32(float %x, float %y) { +; CHECK-LABEL: define float @test_fmin_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call float @_Z4fminff(float %x, float %y) + ret float %fmin +} + +define float @test_fmin_f32_nnan(float %x, float %y) { +; CHECK-LABEL: define float @test_fmin_f32_nnan +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call nnan float @_Z4fminff(float %x, float %y) + ret float %fmin +} + +define <2 x float> @test_fmin_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_fmin_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[FMIN]] +; + %fmin = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %fmin +} + +define <3 x float> @test_fmin_v3f32(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_fmin_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[FMIN]] +; + %fmin = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %x, <3 x float> %y) + ret <3 x float> %fmin +} + +define <4 x float> @test_fmin_v4f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_fmin_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[FMIN]] +; + %fmin = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %x, <4 x float> %y) + ret <4 x float> %fmin +} + +define <8 x float> @test_fmin_v8f32(<8 x float> %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_fmin_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <8 x float> @_Z4fminDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[FMIN]] +; + %fmin = tail call <8 x float> @_Z4fminDv8_fS_(<8 x float> %x, <8 x float> %y) + ret <8 x float> %fmin +} + +define <16 x float> @test_fmin_v16f32(<16 x float> %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_fmin_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <16 x float> @_Z4fminDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[FMIN]] +; + %fmin = tail call <16 x float> @_Z4fminDv16_fS_(<16 x float> %x, <16 x float> %y) + ret <16 x float> %fmin +} + +define double @test_fmin_f64(double %x, double %y) { +; CHECK-LABEL: define double @test_fmin_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call double @_Z4fmindd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[FMIN]] +; + %fmin = tail call double @_Z4fmindd(double %x, double %y) + ret double %fmin +} + +define <2 x double> @test_fmin_v2f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_fmin_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <2 x double> @_Z4fminDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[FMIN]] +; + %fmin = tail call <2 x double> @_Z4fminDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %fmin +} + +define <3 x double> @test_fmin_v3f64(<3 x double> %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_fmin_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <3 x double> @_Z4fminDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[FMIN]] +; + %fmin = tail call <3 x double> @_Z4fminDv3_dS_(<3 x double> %x, <3 x double> %y) + ret <3 x double> %fmin +} + +define <4 x double> @test_fmin_v4f64(<4 x double> %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_fmin_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <4 x double> @_Z4fminDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[FMIN]] +; + %fmin = tail call <4 x double> @_Z4fminDv4_dS_(<4 x double> %x, <4 x double> %y) + ret <4 x double> %fmin +} + +define <8 x double> @test_fmin_v8f64(<8 x double> %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_fmin_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <8 x double> @_Z4fminDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[FMIN]] +; + %fmin = tail call <8 x double> @_Z4fminDv8_dS_(<8 x double> %x, <8 x double> %y) + ret <8 x double> %fmin +} + +define <16 x double> @test_fmin_v16f64(<16 x double> %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_fmin_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <16 x double> @_Z4fminDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[FMIN]] +; + %fmin = tail call <16 x double> @_Z4fminDv16_dS_(<16 x double> %x, <16 x double> %y) + ret <16 x double> %fmin +} + +define half @test_fmin_f16(half %x, half %y) { +; CHECK-LABEL: define half @test_fmin_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call half @_Z4fminDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[FMIN]] +; + %fmin = tail call half @_Z4fminDhDh(half %x, half %y) + ret half %fmin +} + +define <2 x half> @test_fmin_v2f16(<2 x half> %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_fmin_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <2 x half> @_Z4fminDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[FMIN]] +; + %fmin = tail call <2 x half> @_Z4fminDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %fmin +} + +define <3 x half> @test_fmin_v3f16(<3 x half> %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_fmin_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <3 x half> @_Z4fminDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[FMIN]] +; + %fmin = tail call <3 x half> @_Z4fminDv3_DhS_(<3 x half> %x, <3 x half> %y) + ret <3 x half> %fmin +} + +define <4 x half> @test_fmin_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_fmin_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <4 x half> @_Z4fminDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[FMIN]] +; + %fmin = tail call <4 x half> @_Z4fminDv4_DhS_(<4 x half> %x, <4 x half> %y) + ret <4 x half> %fmin +} + +define <8 x half> @test_fmin_v8f16(<8 x half> %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_fmin_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <8 x half> @_Z4fminDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[FMIN]] +; + %fmin = tail call <8 x half> @_Z4fminDv8_DhS_(<8 x half> %x, <8 x half> %y) + ret <8 x half> %fmin +} + +define <16 x half> @test_fmin_v16f16(<16 x half> %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_fmin_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call <16 x half> @_Z4fminDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[FMIN]] +; + %fmin = tail call <16 x half> @_Z4fminDv16_DhS_(<16 x half> %x, <16 x half> %y) + ret <16 x half> %fmin +} + +define float @test_fmin_f32_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_fmin_f32_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call float @_Z4fminff(float %x, float %y) + ret float %fmin +} + +define float @test_fmin_f32_nnan_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_fmin_f32_nnan_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call nnan float @_Z4fminff(float %x, float %y) + ret float %fmin +} + +define float @test_fmin_f32_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_fmin_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call float @_Z4fminff(float %x, float %y) #1 + ret float %fmin +} + +define float @test_fmin_f32_nnan_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_fmin_f32_nnan_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call nnan float @_Z4fminff(float %x, float %y) #1 + ret float %fmin +} + +define float @test_fmin_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @test_fmin_f32_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[FMIN:%.*]] = tail call nnan nsz float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call nsz nnan float @_Z4fminff(float %x, float %y) #2 + ret float %fmin +} + +define float @test_fmin_f32_fast_nobuiltin(float %x, float %y) { +; CHECK-LABEL: define float @test_fmin_f32_fast_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[FMIN:%.*]] = tail call fast float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[FMIN]] +; + %fmin = tail call fast float @_Z4fminff(float %x, float %y) #3 + ret float %fmin +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z5ldexpfi(float, i32) +declare <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float>, <2 x i32>) +declare <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float>, <3 x i32>) +declare <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float>, <4 x i32>) +declare <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float>, <8 x i32>) +declare <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float>, <16 x i32>) +declare double @_Z5ldexpdi(double, i32) +declare <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double>, <2 x i32>) +declare <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double>, <3 x i32>) +declare <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double>, <4 x i32>) +declare <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double>, <8 x i32>) +declare <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double>, <16 x i32>) +declare half @_Z5ldexpDhi(half, i32) +declare <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half>, <2 x i32>) +declare <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half>, <3 x i32>) +declare <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half>, <4 x i32>) +declare <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half>, <8 x i32>) +declare <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half>, <16 x i32>) + +define float @test_ldexp_f32(float %x, i32 %y) { +; CHECK-LABEL: define float @test_ldexp_f32 +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]]) +; CHECK-NEXT: ret float [[LDEXP]] +; + %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) + ret float %ldexp +} + +define float @test_ldexp_f32_fast(float %x, i32 %y) { +; CHECK-LABEL: define float @test_ldexp_f32_fast +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call fast float @_Z5ldexpfi(float [[X]], i32 [[Y]]) +; CHECK-NEXT: ret float [[LDEXP]] +; + %ldexp = tail call fast float @_Z5ldexpfi(float %x, i32 %y) + ret float %ldexp +} + +define <2 x float> @test_ldexp_v2f32(<2 x float> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x float> @test_ldexp_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[LDEXP]] +; + %ldexp = tail call <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float> %x, <2 x i32> %y) + ret <2 x float> %ldexp +} + +define <3 x float> @test_ldexp_v3f32(<3 x float> %x, <3 x i32> %y) { +; CHECK-LABEL: define <3 x float> @test_ldexp_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float> [[X]], <3 x i32> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[LDEXP]] +; + %ldexp = tail call <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float> %x, <3 x i32> %y) + ret <3 x float> %ldexp +} + +define <4 x float> @test_ldexp_v4f32(<4 x float> %x, <4 x i32> %y) { +; CHECK-LABEL: define <4 x float> @test_ldexp_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float> [[X]], <4 x i32> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[LDEXP]] +; + %ldexp = tail call <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float> %x, <4 x i32> %y) + ret <4 x float> %ldexp +} + +define <8 x float> @test_ldexp_v8f32(<8 x float> %x, <8 x i32> %y) { +; CHECK-LABEL: define <8 x float> @test_ldexp_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float> [[X]], <8 x i32> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[LDEXP]] +; + %ldexp = tail call <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float> %x, <8 x i32> %y) + ret <8 x float> %ldexp +} + +define <16 x float> @test_ldexp_v16f32(<16 x float> %x, <16 x i32> %y) { +; CHECK-LABEL: define <16 x float> @test_ldexp_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float> [[X]], <16 x i32> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[LDEXP]] +; + %ldexp = tail call <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float> %x, <16 x i32> %y) + ret <16 x float> %ldexp +} + +define double @test_ldexp_f64(double %x, i32 %y) { +; CHECK-LABEL: define double @test_ldexp_f64 +; CHECK-SAME: (double [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call double @_Z5ldexpdi(double [[X]], i32 [[Y]]) +; CHECK-NEXT: ret double [[LDEXP]] +; + %ldexp = tail call double @_Z5ldexpdi(double %x, i32 %y) + ret double %ldexp +} + +define <2 x double> @test_ldexp_v2f64(<2 x double> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x double> @test_ldexp_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double> [[X]], <2 x i32> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[LDEXP]] +; + %ldexp = tail call <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double> %x, <2 x i32> %y) + ret <2 x double> %ldexp +} + +define <3 x double> @test_ldexp_v3f64(<3 x double> %x, <3 x i32> %y) { +; CHECK-LABEL: define <3 x double> @test_ldexp_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double> [[X]], <3 x i32> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[LDEXP]] +; + %ldexp = tail call <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double> %x, <3 x i32> %y) + ret <3 x double> %ldexp +} + +define <4 x double> @test_ldexp_v4f64(<4 x double> %x, <4 x i32> %y) { +; CHECK-LABEL: define <4 x double> @test_ldexp_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double> [[X]], <4 x i32> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[LDEXP]] +; + %ldexp = tail call <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double> %x, <4 x i32> %y) + ret <4 x double> %ldexp +} + +define <8 x double> @test_ldexp_v8f64(<8 x double> %x, <8 x i32> %y) { +; CHECK-LABEL: define <8 x double> @test_ldexp_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double> [[X]], <8 x i32> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[LDEXP]] +; + %ldexp = tail call <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double> %x, <8 x i32> %y) + ret <8 x double> %ldexp +} + +define <16 x double> @test_ldexp_v16f64(<16 x double> %x, <16 x i32> %y) { +; CHECK-LABEL: define <16 x double> @test_ldexp_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double> [[X]], <16 x i32> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[LDEXP]] +; + %ldexp = tail call <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double> %x, <16 x i32> %y) + ret <16 x double> %ldexp +} + +define half @test_ldexp_f16(half %x, i32 %y) { +; CHECK-LABEL: define half @test_ldexp_f16 +; CHECK-SAME: (half [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call half @_Z5ldexpDhi(half [[X]], i32 [[Y]]) +; CHECK-NEXT: ret half [[LDEXP]] +; + %ldexp = tail call half @_Z5ldexpDhi(half %x, i32 %y) + ret half %ldexp +} + +define <2 x half> @test_ldexp_v2f16(<2 x half> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x half> @test_ldexp_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[LDEXP]] +; + %ldexp = tail call <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y) + ret <2 x half> %ldexp +} + +define <3 x half> @test_ldexp_v3f16(<3 x half> %x, <3 x i32> %y) { +; CHECK-LABEL: define <3 x half> @test_ldexp_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half> [[X]], <3 x i32> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[LDEXP]] +; + %ldexp = tail call <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half> %x, <3 x i32> %y) + ret <3 x half> %ldexp +} + +define <4 x half> @test_ldexp_v4f16(<4 x half> %x, <4 x i32> %y) { +; CHECK-LABEL: define <4 x half> @test_ldexp_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half> [[X]], <4 x i32> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[LDEXP]] +; + %ldexp = tail call <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half> %x, <4 x i32> %y) + ret <4 x half> %ldexp +} + +define <8 x half> @test_ldexp_v8f16(<8 x half> %x, <8 x i32> %y) { +; CHECK-LABEL: define <8 x half> @test_ldexp_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half> [[X]], <8 x i32> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[LDEXP]] +; + %ldexp = tail call <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half> %x, <8 x i32> %y) + ret <8 x half> %ldexp +} + +define <16 x half> @test_ldexp_v16f16(<16 x half> %x, <16 x i32> %y) { +; CHECK-LABEL: define <16 x half> @test_ldexp_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half> [[X]], <16 x i32> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[LDEXP]] +; + %ldexp = tail call <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half> %x, <16 x i32> %y) + ret <16 x half> %ldexp +} + +define float @test_ldexp_f32_minsize(float %x, i32 %y) #3 { +; CHECK-LABEL: define float @test_ldexp_f32_minsize +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]]) +; CHECK-NEXT: ret float [[LDEXP]] +; + %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) + ret float %ldexp +} + +define float @test_ldexp_f32_nobuiltin(float %x, i32 %y) { +; CHECK-LABEL: define float @test_ldexp_f32_nobuiltin +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[LDEXP]] +; + %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) #0 + ret float %ldexp +} + +define float @test_ldexp_f32_strictfp(float %x, i32 %y) #4 { +; CHECK-LABEL: define float @test_ldexp_f32_strictfp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[LDEXP:%.*]] = tail call nnan float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[LDEXP]] +; + %ldexp = tail call nnan float @_Z5ldexpfi(float %x, i32 %y) #4 + ret float %ldexp +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { minsize } +attributes #4 = { strictfp } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z3logf(float) +declare <2 x float> @_Z3logDv2_f(<2 x float>) +declare <3 x float> @_Z3logDv3_f(<3 x float>) +declare <4 x float> @_Z3logDv4_f(<4 x float>) +declare <8 x float> @_Z3logDv8_f(<8 x float>) +declare <16 x float> @_Z3logDv16_f(<16 x float>) + +declare double @_Z3logd(double) +declare <2 x double> @_Z3logDv2_d(<2 x double>) +declare <3 x double> @_Z3logDv3_d(<3 x double>) +declare <4 x double> @_Z3logDv4_d(<4 x double>) +declare <8 x double> @_Z3logDv8_d(<8 x double>) +declare <16 x double> @_Z3logDv16_d(<16 x double>) + +declare half @_Z3logDh(half) +declare <2 x half> @_Z3logDv2_Dh(<2 x half>) +declare <3 x half> @_Z3logDv3_Dh(<3 x half>) +declare <4 x half> @_Z3logDv4_Dh(<4 x half>) +declare <8 x half> @_Z3logDv8_Dh(<8 x half>) +declare <16 x half> @_Z3logDv16_Dh(<16 x half>) + +define float @test_log_f32(float %arg) { +; CHECK-LABEL: define float @test_log_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define <2 x float> @test_log_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log +} + +define <3 x float> @test_log_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <3 x float> @_Z3logDv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[LOG]] +; + %log = tail call <3 x float> @_Z3logDv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %log +} + +define <4 x float> @test_log_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <4 x float> @_Z3logDv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[LOG]] +; + %log = tail call <4 x float> @_Z3logDv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %log +} + +define <8 x float> @test_log_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <8 x float> @_Z3logDv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[LOG]] +; + %log = tail call <8 x float> @_Z3logDv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %log +} + +define <16 x float> @test_log_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <16 x float> @_Z3logDv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[LOG]] +; + %log = tail call <16 x float> @_Z3logDv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %log +} + +define float @test_log_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_log_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg) + ret float %log +} + +define <2 x float> @test_log_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) + ret <2 x float> %log +} + +define <3 x float> @test_log_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <3 x float> @_Z3logDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[LOG]] +; + %log = tail call <3 x float> @_Z3logDv3_f(<3 x float> %arg) + ret <3 x float> %log +} + +define <4 x float> @test_log_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <4 x float> @_Z3logDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[LOG]] +; + %log = tail call <4 x float> @_Z3logDv4_f(<4 x float> %arg) + ret <4 x float> %log +} + +define <8 x float> @test_log_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <8 x float> @_Z3logDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[LOG]] +; + %log = tail call <8 x float> @_Z3logDv8_f(<8 x float> %arg) + ret <8 x float> %log +} + +define <16 x float> @test_log_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <16 x float> @_Z3logDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[LOG]] +; + %log = tail call <16 x float> @_Z3logDv16_f(<16 x float> %arg) + ret <16 x float> %log +} + +define double @test_log_f64(double %arg) { +; CHECK-LABEL: define double @test_log_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call double @_Z3logd(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG]] +; + %log = tail call double @_Z3logd(double %arg) + ret double %log +} + +define <2 x double> @test_log_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_log_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x double> @_Z3logDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[LOG]] +; + %log = tail call <2 x double> @_Z3logDv2_d(<2 x double> %arg) + ret <2 x double> %log +} + +define <3 x double> @test_log_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_log_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <3 x double> @_Z3logDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[LOG]] +; + %log = tail call <3 x double> @_Z3logDv3_d(<3 x double> %arg) + ret <3 x double> %log +} + +define <4 x double> @test_log_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_log_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <4 x double> @_Z3logDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[LOG]] +; + %log = tail call <4 x double> @_Z3logDv4_d(<4 x double> %arg) + ret <4 x double> %log +} + +define <8 x double> @test_log_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_log_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <8 x double> @_Z3logDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[LOG]] +; + %log = tail call <8 x double> @_Z3logDv8_d(<8 x double> %arg) + ret <8 x double> %log +} + +define <16 x double> @test_log_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_log_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <16 x double> @_Z3logDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[LOG]] +; + %log = tail call <16 x double> @_Z3logDv16_d(<16 x double> %arg) + ret <16 x double> %log +} + +define half @test_log_f16(half %arg) { +; CHECK-LABEL: define half @test_log_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call half @_Z3logDh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG]] +; + %log = tail call half @_Z3logDh(half %arg) + ret half %log +} + +define half @test_log_f16_fast(half %arg) { +; CHECK-LABEL: define half @test_log_f16_fast +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast half @_Z3logDh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG]] +; + %log = tail call fast half @_Z3logDh(half %arg) + ret half %log +} + +define <2 x half> @test_log_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_log_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x half> @_Z3logDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[LOG]] +; + %log = tail call <2 x half> @_Z3logDv2_Dh(<2 x half> %arg) + ret <2 x half> %log +} + +define <3 x half> @test_log_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_log_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <3 x half> @_Z3logDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[LOG]] +; + %log = tail call <3 x half> @_Z3logDv3_Dh(<3 x half> %arg) + ret <3 x half> %log +} + +define <4 x half> @test_log_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_log_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <4 x half> @_Z3logDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[LOG]] +; + %log = tail call <4 x half> @_Z3logDv4_Dh(<4 x half> %arg) + ret <4 x half> %log +} + +define <8 x half> @test_log_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_log_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <8 x half> @_Z3logDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[LOG]] +; + %log = tail call <8 x half> @_Z3logDv8_Dh(<8 x half> %arg) + ret <8 x half> %log +} + +define <16 x half> @test_log_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_log_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <16 x half> @_Z3logDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[LOG]] +; + %log = tail call <16 x half> @_Z3logDv16_Dh(<16 x half> %arg) + ret <16 x half> %log +} + +define float @test_log_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0 + ret float %log +} + +define <2 x float> @test_log_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log +} + +define float @test_log_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg) #0 + ret float %log +} + +define <2 x float> @test_log_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0 + ret <2 x float> %log +} + +; "no-builtins" should be ignored +define float @test_log_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0 + ret float %log +} + +define <2 x float> @test_log_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log +} + +define float @test_log_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @_Z3logf(float %arg) #0 + ret float %log +} + +define <2 x float> @test_log_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0 + ret <2 x float> %log +} + +define float @test_log_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call nnan ninf float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nnan ninf float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define <2 x float> @test_log_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call nnan nsz contract <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call contract nsz nnan <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log +} + +define float @test_log_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_log_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call nnan ninf float @_Z3logf(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nnan ninf float @_Z3logf(float %arg), !fpmath !0, !foo !1 + ret float %log +} + +define <2 x float> @test_log_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call nnan nsz contract <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call contract nsz nnan <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %log +} + +define float @test_log_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call ninf contract float @_Z3logf(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call ninf contract float @_Z3logf(float %arg) + ret float %log +} + +define <2 x float> @test_log_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call nnan nsz <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG]] +; + %log = tail call nnan nsz <2 x float> @_Z3logDv2_f(<2 x float> %arg) + ret <2 x float> %log +} + +; Test the libm name, not a recognized opencl builtin. +declare float @logf(float) #2 +declare double @log(double) #2 + +define float @test_libm_log_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_log_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @logf(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @logf(float %arg) + ret float %log +} + +define float @test_libm_log_f32_fast(float %arg) { +; CHECK-LABEL: define float @test_libm_log_f32_fast +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast float @logf(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call fast float @logf(float %arg) + ret float %log +} + +define float @test_libm_log_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_log_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call float @logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call float @logf(float %arg), !fpmath !0 + ret float %log +} + +define double @test_libm_log_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_log_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call double @log(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG]] +; + %log = tail call double @log(double %arg) + ret double %log +} + +define double @test_libm_log_f64_fast(double %arg) { +; CHECK-LABEL: define double @test_libm_log_f64_fast +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast double @log(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG]] +; + %log = tail call fast double @log(double %arg) + ret double %log +} + +define double @test_libm_log_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_log_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call double @log(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[LOG]] +; + %log = tail call double @log(double %arg), !fpmath !0 + ret double %log +} + +define float @test_log_f32_fast_noinline(float %arg) { +; CHECK-LABEL: define float @test_log_f32_fast_noinline +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call fast float @_Z3logf(float %arg) #3, !fpmath !0 + ret float %log +} + +define float @test_log_f32_fast_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log_f32_fast_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call fast float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define float @test_log_f32_fast_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log_f32_fast_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call fast float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define float @test_log_f32_nsz_contract_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log_f32_nsz_contract_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call nsz contract float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nsz contract float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define float @test_log_f32_nsz_contract_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log_f32_nsz_contract_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call nsz contract float @_Z3logf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nsz contract float @_Z3logf(float %arg), !fpmath !0 + ret float %log +} + +define half @test_log_f16_fast_minsize(half %arg) #5 { +; CHECK-LABEL: define half @test_log_f16_fast_minsize +; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call half @_Z3logDh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG]] +; + %log = tail call half @_Z3logDh(half %arg) + ret half %log +} + +define float @test_log_f32_strictfp(float %arg) #6 { +; CHECK-LABEL: define float @test_log_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call nsz float @_Z3logf(float [[ARG]]) #[[ATTR4]] +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nsz float @_Z3logf(float %arg) #6 + ret float %log +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { noinline } +attributes #4 = { optsize } +attributes #5 = { minsize } +attributes #6 = { strictfp } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z5log10f(float) +declare <2 x float> @_Z5log10Dv2_f(<2 x float>) +declare <3 x float> @_Z5log10Dv3_f(<3 x float>) +declare <4 x float> @_Z5log10Dv4_f(<4 x float>) +declare <8 x float> @_Z5log10Dv8_f(<8 x float>) +declare <16 x float> @_Z5log10Dv16_f(<16 x float>) + +declare double @_Z5log10d(double) +declare <2 x double> @_Z5log10Dv2_d(<2 x double>) +declare <3 x double> @_Z5log10Dv3_d(<3 x double>) +declare <4 x double> @_Z5log10Dv4_d(<4 x double>) +declare <8 x double> @_Z5log10Dv8_d(<8 x double>) +declare <16 x double> @_Z5log10Dv16_d(<16 x double>) + +declare half @_Z5log10Dh(half) +declare <2 x half> @_Z5log10Dv2_Dh(<2 x half>) +declare <3 x half> @_Z5log10Dv3_Dh(<3 x half>) +declare <4 x half> @_Z5log10Dv4_Dh(<4 x half>) +declare <8 x half> @_Z5log10Dv8_Dh(<8 x half>) +declare <16 x half> @_Z5log10Dv16_Dh(<16 x half>) + +define float @test_log10_f32(float %arg) { +; CHECK-LABEL: define float @test_log10_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define <2 x float> @test_log10_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log10 +} + +define <3 x float> @test_log10_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log10_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[LOG10]] +; + %log10 = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %log10 +} + +define <4 x float> @test_log10_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log10_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[LOG10]] +; + %log10 = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %log10 +} + +define <8 x float> @test_log10_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log10_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[LOG10]] +; + %log10 = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %log10 +} + +define <16 x float> @test_log10_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log10_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[LOG10]] +; + %log10 = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %log10 +} + +define float @test_log10_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_log10_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg) + ret float %log10 +} + +define <2 x float> @test_log10_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) + ret <2 x float> %log10 +} + +define <3 x float> @test_log10_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log10_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[LOG10]] +; + %log10 = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> %arg) + ret <3 x float> %log10 +} + +define <4 x float> @test_log10_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log10_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[LOG10]] +; + %log10 = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> %arg) + ret <4 x float> %log10 +} + +define <8 x float> @test_log10_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log10_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[LOG10]] +; + %log10 = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> %arg) + ret <8 x float> %log10 +} + +define <16 x float> @test_log10_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log10_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[LOG10]] +; + %log10 = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> %arg) + ret <16 x float> %log10 +} + +define double @test_log10_f64(double %arg) { +; CHECK-LABEL: define double @test_log10_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call double @_Z5log10d(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG10]] +; + %log10 = tail call double @_Z5log10d(double %arg) + ret double %log10 +} + +define <2 x double> @test_log10_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_log10_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x double> @_Z5log10Dv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[LOG10]] +; + %log10 = tail call <2 x double> @_Z5log10Dv2_d(<2 x double> %arg) + ret <2 x double> %log10 +} + +define <3 x double> @test_log10_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_log10_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <3 x double> @_Z5log10Dv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[LOG10]] +; + %log10 = tail call <3 x double> @_Z5log10Dv3_d(<3 x double> %arg) + ret <3 x double> %log10 +} + +define <4 x double> @test_log10_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_log10_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <4 x double> @_Z5log10Dv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[LOG10]] +; + %log10 = tail call <4 x double> @_Z5log10Dv4_d(<4 x double> %arg) + ret <4 x double> %log10 +} + +define <8 x double> @test_log10_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_log10_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <8 x double> @_Z5log10Dv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[LOG10]] +; + %log10 = tail call <8 x double> @_Z5log10Dv8_d(<8 x double> %arg) + ret <8 x double> %log10 +} + +define <16 x double> @test_log10_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_log10_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <16 x double> @_Z5log10Dv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[LOG10]] +; + %log10 = tail call <16 x double> @_Z5log10Dv16_d(<16 x double> %arg) + ret <16 x double> %log10 +} + +define half @test_log10_f16(half %arg) { +; CHECK-LABEL: define half @test_log10_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call half @_Z5log10Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG10]] +; + %log10 = tail call half @_Z5log10Dh(half %arg) + ret half %log10 +} + +define half @test_log10_f16_fast(half %arg) { +; CHECK-LABEL: define half @test_log10_f16_fast +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast half @_Z5log10Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG10]] +; + %log10 = tail call fast half @_Z5log10Dh(half %arg) + ret half %log10 +} + +define <2 x half> @test_log10_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_log10_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x half> @_Z5log10Dv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[LOG10]] +; + %log10 = tail call <2 x half> @_Z5log10Dv2_Dh(<2 x half> %arg) + ret <2 x half> %log10 +} + +define <3 x half> @test_log10_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_log10_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <3 x half> @_Z5log10Dv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[LOG10]] +; + %log10 = tail call <3 x half> @_Z5log10Dv3_Dh(<3 x half> %arg) + ret <3 x half> %log10 +} + +define <4 x half> @test_log10_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_log10_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <4 x half> @_Z5log10Dv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[LOG10]] +; + %log10 = tail call <4 x half> @_Z5log10Dv4_Dh(<4 x half> %arg) + ret <4 x half> %log10 +} + +define <8 x half> @test_log10_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_log10_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <8 x half> @_Z5log10Dv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[LOG10]] +; + %log10 = tail call <8 x half> @_Z5log10Dv8_Dh(<8 x half> %arg) + ret <8 x half> %log10 +} + +define <16 x half> @test_log10_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_log10_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <16 x half> @_Z5log10Dv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[LOG10]] +; + %log10 = tail call <16 x half> @_Z5log10Dv16_Dh(<16 x half> %arg) + ret <16 x half> %log10 +} + +define float @test_log10_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log10_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0 + ret float %log10 +} + +define <2 x float> @test_log10_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log10 +} + +define float @test_log10_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log10_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg) #0 + ret float %log10 +} + +define <2 x float> @test_log10_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %log10 +} + +; "no-builtins" should be ignored +define float @test_log10_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log10_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0 + ret float %log10 +} + +define <2 x float> @test_log10_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log10_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log10 +} + +define float @test_log10_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log10_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @_Z5log10f(float %arg) #0 + ret float %log10 +} + +define <2 x float> @test_log10_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %log10 +} + +define float @test_log10_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log10_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nnan ninf float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call nnan ninf float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define <2 x float> @test_log10_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nnan nsz contract <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call contract nsz nnan <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log10 +} + +define float @test_log10_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_log10_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nnan ninf float @_Z5log10f(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call nnan ninf float @_Z5log10f(float %arg), !fpmath !0, !foo !1 + ret float %log10 +} + +define <2 x float> @test_log10_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nnan nsz contract <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call contract nsz nnan <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %log10 +} + +define float @test_log10_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log10_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call ninf contract float @_Z5log10f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call ninf contract float @_Z5log10f(float %arg) + ret float %log10 +} + +define <2 x float> @test_log10_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nnan nsz <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG10]] +; + %log10 = tail call nnan nsz <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) + ret <2 x float> %log10 +} + +; Test the libm name, not a recognized opencl builtin. +declare float @log10f(float) #2 +declare double @log10(double) #2 + +define float @test_libm_log10_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_log10_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @log10f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @log10f(float %arg) + ret float %log10 +} + +define float @test_libm_log10_f32_fast(float %arg) { +; CHECK-LABEL: define float @test_libm_log10_f32_fast +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast float @log10f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call fast float @log10f(float %arg) + ret float %log10 +} + +define float @test_libm_log10_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_log10_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call float @log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call float @log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define double @test_libm_log10_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_log10_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call double @log10(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG10]] +; + %log10 = tail call double @log10(double %arg) + ret double %log10 +} + +define double @test_libm_log10_f64_fast(double %arg) { +; CHECK-LABEL: define double @test_libm_log10_f64_fast +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast double @log10(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG10]] +; + %log10 = tail call fast double @log10(double %arg) + ret double %log10 +} + +define double @test_libm_log10_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_log10_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call double @log10(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[LOG10]] +; + %log10 = tail call double @log10(double %arg), !fpmath !0 + ret double %log10 +} + +define float @test_log10_f32_fast_noinline(float %arg) { +; CHECK-LABEL: define float @test_log10_f32_fast_noinline +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call fast float @_Z5log10f(float %arg) #3, !fpmath !0 + ret float %log10 +} + +define float @test_log10_f32_fast_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log10_f32_fast_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call fast float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define float @test_log10_f32_fast_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log10_f32_fast_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call fast float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define float @test_log10_f32_nsz_contract_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log10_f32_nsz_contract_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nsz contract float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call nsz contract float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define float @test_log10_f32_nsz_contract_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log10_f32_nsz_contract_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call nsz contract float @_Z5log10f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG10]] +; + %log10 = tail call nsz contract float @_Z5log10f(float %arg), !fpmath !0 + ret float %log10 +} + +define half @test_log10_f16_fast_minsize(half %arg) #5 { +; CHECK-LABEL: define half @test_log10_f16_fast_minsize +; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG10:%.*]] = tail call fast half @_Z5log10Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG10]] +; + %log10 = tail call fast half @_Z5log10Dh(half %arg) + ret half %log10 +} + +define float @test_log10_f32_strictfp(float %arg) #6 { +; CHECK-LABEL: define float @test_log10_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call nsz float @_Z5log10f(float [[ARG]]) #[[ATTR4]] +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nsz float @_Z5log10f(float %arg) #6 + ret float %log +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { noinline } +attributes #4 = { optsize } +attributes #5 = { minsize } +attributes #6 = { strictfp } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4log2f(float) +declare <2 x float> @_Z4log2Dv2_f(<2 x float>) +declare <3 x float> @_Z4log2Dv3_f(<3 x float>) +declare <4 x float> @_Z4log2Dv4_f(<4 x float>) +declare <8 x float> @_Z4log2Dv8_f(<8 x float>) +declare <16 x float> @_Z4log2Dv16_f(<16 x float>) + +declare double @_Z4log2d(double) +declare <2 x double> @_Z4log2Dv2_d(<2 x double>) +declare <3 x double> @_Z4log2Dv3_d(<3 x double>) +declare <4 x double> @_Z4log2Dv4_d(<4 x double>) +declare <8 x double> @_Z4log2Dv8_d(<8 x double>) +declare <16 x double> @_Z4log2Dv16_d(<16 x double>) + +declare half @_Z4log2Dh(half) +declare <2 x half> @_Z4log2Dv2_Dh(<2 x half>) +declare <3 x half> @_Z4log2Dv3_Dh(<3 x half>) +declare <4 x half> @_Z4log2Dv4_Dh(<4 x half>) +declare <8 x half> @_Z4log2Dv8_Dh(<8 x half>) +declare <16 x half> @_Z4log2Dv16_Dh(<16 x half>) + +define float @test_log2_f32(float %arg) { +; CHECK-LABEL: define float @test_log2_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define <2 x float> @test_log2_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log2 +} + +define <3 x float> @test_log2_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log2_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[LOG2]] +; + %log2 = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %log2 +} + +define <4 x float> @test_log2_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log2_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[LOG2]] +; + %log2 = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %log2 +} + +define <8 x float> @test_log2_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log2_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[LOG2]] +; + %log2 = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %log2 +} + +define <16 x float> @test_log2_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log2_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[LOG2]] +; + %log2 = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %log2 +} + +define float @test_log2_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_log2_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg) + ret float %log2 +} + +define <2 x float> @test_log2_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) + ret <2 x float> %log2 +} + +define <3 x float> @test_log2_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_log2_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[LOG2]] +; + %log2 = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> %arg) + ret <3 x float> %log2 +} + +define <4 x float> @test_log2_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_log2_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[LOG2]] +; + %log2 = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> %arg) + ret <4 x float> %log2 +} + +define <8 x float> @test_log2_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_log2_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[LOG2]] +; + %log2 = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> %arg) + ret <8 x float> %log2 +} + +define <16 x float> @test_log2_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_log2_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[LOG2]] +; + %log2 = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> %arg) + ret <16 x float> %log2 +} + +define double @test_log2_f64(double %arg) { +; CHECK-LABEL: define double @test_log2_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call double @_Z4log2d(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG2]] +; + %log2 = tail call double @_Z4log2d(double %arg) + ret double %log2 +} + +define <2 x double> @test_log2_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_log2_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x double> @_Z4log2Dv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[LOG2]] +; + %log2 = tail call <2 x double> @_Z4log2Dv2_d(<2 x double> %arg) + ret <2 x double> %log2 +} + +define <3 x double> @test_log2_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_log2_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <3 x double> @_Z4log2Dv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[LOG2]] +; + %log2 = tail call <3 x double> @_Z4log2Dv3_d(<3 x double> %arg) + ret <3 x double> %log2 +} + +define <4 x double> @test_log2_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_log2_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <4 x double> @_Z4log2Dv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[LOG2]] +; + %log2 = tail call <4 x double> @_Z4log2Dv4_d(<4 x double> %arg) + ret <4 x double> %log2 +} + +define <8 x double> @test_log2_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_log2_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <8 x double> @_Z4log2Dv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[LOG2]] +; + %log2 = tail call <8 x double> @_Z4log2Dv8_d(<8 x double> %arg) + ret <8 x double> %log2 +} + +define <16 x double> @test_log2_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_log2_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <16 x double> @_Z4log2Dv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[LOG2]] +; + %log2 = tail call <16 x double> @_Z4log2Dv16_d(<16 x double> %arg) + ret <16 x double> %log2 +} + +define half @test_log2_f16(half %arg) { +; CHECK-LABEL: define half @test_log2_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call half @_Z4log2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG2]] +; + %log2 = tail call half @_Z4log2Dh(half %arg) + ret half %log2 +} + +define half @test_log2_f16_fast(half %arg) { +; CHECK-LABEL: define half @test_log2_f16_fast +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast half @_Z4log2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG2]] +; + %log2 = tail call fast half @_Z4log2Dh(half %arg) + ret half %log2 +} + +define <2 x half> @test_log2_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_log2_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x half> @_Z4log2Dv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[LOG2]] +; + %log2 = tail call <2 x half> @_Z4log2Dv2_Dh(<2 x half> %arg) + ret <2 x half> %log2 +} + +define <3 x half> @test_log2_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_log2_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <3 x half> @_Z4log2Dv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[LOG2]] +; + %log2 = tail call <3 x half> @_Z4log2Dv3_Dh(<3 x half> %arg) + ret <3 x half> %log2 +} + +define <4 x half> @test_log2_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_log2_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <4 x half> @_Z4log2Dv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[LOG2]] +; + %log2 = tail call <4 x half> @_Z4log2Dv4_Dh(<4 x half> %arg) + ret <4 x half> %log2 +} + +define <8 x half> @test_log2_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_log2_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <8 x half> @_Z4log2Dv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[LOG2]] +; + %log2 = tail call <8 x half> @_Z4log2Dv8_Dh(<8 x half> %arg) + ret <8 x half> %log2 +} + +define <16 x half> @test_log2_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_log2_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <16 x half> @_Z4log2Dv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[LOG2]] +; + %log2 = tail call <16 x half> @_Z4log2Dv16_Dh(<16 x half> %arg) + ret <16 x half> %log2 +} + +define float @test_log2_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log2_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0 + ret float %log2 +} + +define <2 x float> @test_log2_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log2 +} + +define float @test_log2_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_log2_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg) #0 + ret float %log2 +} + +define <2 x float> @test_log2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %log2 +} + +; "no-builtins" should be ignored +define float @test_log2_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log2_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0 + ret float %log2 +} + +define <2 x float> @test_log2_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log2_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %log2 +} + +define float @test_log2_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_log2_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @_Z4log2f(float %arg) #0 + ret float %log2 +} + +define <2 x float> @test_log2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]] +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0 + ret <2 x float> %log2 +} + +define float @test_log2_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log2_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nnan ninf float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call nnan ninf float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define <2 x float> @test_log2_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call contract nsz nnan <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %log2 +} + +define float @test_log2_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_log2_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nnan ninf float @_Z4log2f(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call nnan ninf float @_Z4log2f(float %arg), !fpmath !0, !foo !1 + ret float %log2 +} + +define <2 x float> @test_log2_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call contract nsz nnan <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %log2 +} + +define float @test_log2_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_log2_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call ninf contract float @_Z4log2f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call ninf contract float @_Z4log2f(float %arg) + ret float %log2 +} + +define <2 x float> @test_log2_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nnan nsz <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[LOG2]] +; + %log2 = tail call nnan nsz <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) + ret <2 x float> %log2 +} + +; Test the libm name, not a recognized opencl builtin. +declare float @log2f(float) #2 +declare double @log2(double) #2 + +define float @test_libm_log2_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_log2_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @log2f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @log2f(float %arg) + ret float %log2 +} + +define float @test_libm_log2_f32_fast(float %arg) { +; CHECK-LABEL: define float @test_libm_log2_f32_fast +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast float @log2f(float [[ARG]]) +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call fast float @log2f(float %arg) + ret float %log2 +} + +define float @test_libm_log2_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_log2_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call float @log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call float @log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define double @test_libm_log2_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_log2_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call double @log2(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG2]] +; + %log2 = tail call double @log2(double %arg) + ret double %log2 +} + +define double @test_libm_log2_f64_fast(double %arg) { +; CHECK-LABEL: define double @test_libm_log2_f64_fast +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast double @log2(double [[ARG]]) +; CHECK-NEXT: ret double [[LOG2]] +; + %log2 = tail call fast double @log2(double %arg) + ret double %log2 +} + +define double @test_libm_log2_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_log2_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call double @log2(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[LOG2]] +; + %log2 = tail call double @log2(double %arg), !fpmath !0 + ret double %log2 +} + +define float @test_log2_f32_fast_noinline(float %arg) { +; CHECK-LABEL: define float @test_log2_f32_fast_noinline +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call fast float @_Z4log2f(float %arg) #3, !fpmath !0 + ret float %log2 +} + +define float @test_log2_f32_fast_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log2_f32_fast_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call fast float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define float @test_log2_f32_fast_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log2_f32_fast_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call fast float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define float @test_log2_f32_nsz_contract_optsize(float %arg) #4 { +; CHECK-LABEL: define float @test_log2_f32_nsz_contract_optsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nsz contract float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call nsz contract float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define float @test_log2_f32_nsz_contract_minsize(float %arg) #5 { +; CHECK-LABEL: define float @test_log2_f32_nsz_contract_minsize +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call nsz contract float @_Z4log2f(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[LOG2]] +; + %log2 = tail call nsz contract float @_Z4log2f(float %arg), !fpmath !0 + ret float %log2 +} + +define half @test_log2_f16_fast_minsize(half %arg) #5 { +; CHECK-LABEL: define half @test_log2_f16_fast_minsize +; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[LOG2:%.*]] = tail call fast half @_Z4log2Dh(half [[ARG]]) +; CHECK-NEXT: ret half [[LOG2]] +; + %log2 = tail call fast half @_Z4log2Dh(half %arg) + ret half %log2 +} + +define float @test_log2_f32_strictfp(float %arg) #6 { +; CHECK-LABEL: define float @test_log2_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[LOG:%.*]] = tail call nsz float @_Z4log2f(float [[ARG]]) #[[ATTR4]] +; CHECK-NEXT: ret float [[LOG]] +; + %log = tail call nsz float @_Z4log2f(float %arg) #6 + ret float %log +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { noinline } +attributes #4 = { optsize } +attributes #5 = { minsize } +attributes #6 = { strictfp } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z3madfff(float, float, float) +declare <2 x float> @_Z3madDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>) +declare <3 x float> @_Z3madDv3_fS_S_(<3 x float>, <3 x float>, <3 x float>) +declare <4 x float> @_Z3madDv4_fS_S_(<4 x float>, <4 x float>, <4 x float>) +declare <8 x float> @_Z3madDv8_fS_S_(<8 x float>, <8 x float>, <8 x float>) +declare <16 x float> @_Z3madDv16_fS_S_(<16 x float>, <16 x float>, <16 x float>) +declare double @_Z3madddd(double, double, double) +declare <2 x double> @_Z3madDv2_dS_S_(<2 x double>, <2 x double>, <2 x double>) +declare <3 x double> @_Z3madDv3_dS_S_(<3 x double>, <3 x double>, <3 x double>) +declare <4 x double> @_Z3madDv4_dS_S_(<4 x double>, <4 x double>, <4 x double>) +declare <8 x double> @_Z3madDv8_dS_S_(<8 x double>, <8 x double>, <8 x double>) +declare <16 x double> @_Z3madDv16_dS_S_(<16 x double>, <16 x double>, <16 x double>) +declare half @_Z3madDhDhDh(half, half, half) +declare <2 x half> @_Z3madDv2_DhS_S_(<2 x half>, <2 x half>, <2 x half>) +declare <3 x half> @_Z3madDv3_DhS_S_(<3 x half>, <3 x half>, <3 x half>) +declare <4 x half> @_Z3madDv4_DhS_S_(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @_Z3madDv8_DhS_S_(<8 x half>, <8 x half>, <8 x half>) +declare <16 x half> @_Z3madDv16_DhS_S_(<16 x half>, <16 x half>, <16 x half>) + +define float @test_mad_f32(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_mad_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call float @_Z3madfff(float %x, float %y, float %z) + ret float %mad +} + +define <2 x float> @test_mad_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: define <2 x float> @test_mad_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <2 x float> @_Z3madDv2_fS_S_(<2 x float> [[X]], <2 x float> [[Y]], <2 x float> [[Z]]) +; CHECK-NEXT: ret <2 x float> [[MAD]] +; + %mad = tail call <2 x float> @_Z3madDv2_fS_S_(<2 x float> %x, <2 x float> %y, <2 x float> %z) + ret <2 x float> %mad +} + +define <3 x float> @test_mad_v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z) { +; CHECK-LABEL: define <3 x float> @test_mad_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]], <3 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <3 x float> @_Z3madDv3_fS_S_(<3 x float> [[X]], <3 x float> [[Y]], <3 x float> [[Z]]) +; CHECK-NEXT: ret <3 x float> [[MAD]] +; + %mad = tail call <3 x float> @_Z3madDv3_fS_S_(<3 x float> %x, <3 x float> %y, <3 x float> %z) + ret <3 x float> %mad +} + +define <4 x float> @test_mad_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @test_mad_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <4 x float> @_Z3madDv4_fS_S_(<4 x float> [[X]], <4 x float> [[Y]], <4 x float> [[Z]]) +; CHECK-NEXT: ret <4 x float> [[MAD]] +; + %mad = tail call <4 x float> @_Z3madDv4_fS_S_(<4 x float> %x, <4 x float> %y, <4 x float> %z) + ret <4 x float> %mad +} + +define <8 x float> @test_mad_v8f32(<8 x float> %x, <8 x float> %y, <8 x float> %z) { +; CHECK-LABEL: define <8 x float> @test_mad_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]], <8 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <8 x float> @_Z3madDv8_fS_S_(<8 x float> [[X]], <8 x float> [[Y]], <8 x float> [[Z]]) +; CHECK-NEXT: ret <8 x float> [[MAD]] +; + %mad = tail call <8 x float> @_Z3madDv8_fS_S_(<8 x float> %x, <8 x float> %y, <8 x float> %z) + ret <8 x float> %mad +} + +define <16 x float> @test_mad_v16f32(<16 x float> %x, <16 x float> %y, <16 x float> %z) { +; CHECK-LABEL: define <16 x float> @test_mad_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]], <16 x float> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <16 x float> @_Z3madDv16_fS_S_(<16 x float> [[X]], <16 x float> [[Y]], <16 x float> [[Z]]) +; CHECK-NEXT: ret <16 x float> [[MAD]] +; + %mad = tail call <16 x float> @_Z3madDv16_fS_S_(<16 x float> %x, <16 x float> %y, <16 x float> %z) + ret <16 x float> %mad +} + +define double @test_mad_f64(double %x, double %y, double %z) { +; CHECK-LABEL: define double @test_mad_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call double @_Z3madddd(double [[X]], double [[Y]], double [[Z]]) +; CHECK-NEXT: ret double [[MAD]] +; + %mad = tail call double @_Z3madddd(double %x, double %y, double %z) + ret double %mad +} + +define <2 x double> @test_mad_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { +; CHECK-LABEL: define <2 x double> @test_mad_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <2 x double> @_Z3madDv2_dS_S_(<2 x double> [[X]], <2 x double> [[Y]], <2 x double> [[Z]]) +; CHECK-NEXT: ret <2 x double> [[MAD]] +; + %mad = tail call <2 x double> @_Z3madDv2_dS_S_(<2 x double> %x, <2 x double> %y, <2 x double> %z) + ret <2 x double> %mad +} + +define <3 x double> @test_mad_v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z) { +; CHECK-LABEL: define <3 x double> @test_mad_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]], <3 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <3 x double> @_Z3madDv3_dS_S_(<3 x double> [[X]], <3 x double> [[Y]], <3 x double> [[Z]]) +; CHECK-NEXT: ret <3 x double> [[MAD]] +; + %mad = tail call <3 x double> @_Z3madDv3_dS_S_(<3 x double> %x, <3 x double> %y, <3 x double> %z) + ret <3 x double> %mad +} + +define <4 x double> @test_mad_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) { +; CHECK-LABEL: define <4 x double> @test_mad_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]], <4 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <4 x double> @_Z3madDv4_dS_S_(<4 x double> [[X]], <4 x double> [[Y]], <4 x double> [[Z]]) +; CHECK-NEXT: ret <4 x double> [[MAD]] +; + %mad = tail call <4 x double> @_Z3madDv4_dS_S_(<4 x double> %x, <4 x double> %y, <4 x double> %z) + ret <4 x double> %mad +} + +define <8 x double> @test_mad_v8f64(<8 x double> %x, <8 x double> %y, <8 x double> %z) { +; CHECK-LABEL: define <8 x double> @test_mad_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]], <8 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <8 x double> @_Z3madDv8_dS_S_(<8 x double> [[X]], <8 x double> [[Y]], <8 x double> [[Z]]) +; CHECK-NEXT: ret <8 x double> [[MAD]] +; + %mad = tail call <8 x double> @_Z3madDv8_dS_S_(<8 x double> %x, <8 x double> %y, <8 x double> %z) + ret <8 x double> %mad +} + +define <16 x double> @test_mad_v16f64(<16 x double> %x, <16 x double> %y, <16 x double> %z) { +; CHECK-LABEL: define <16 x double> @test_mad_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]], <16 x double> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <16 x double> @_Z3madDv16_dS_S_(<16 x double> [[X]], <16 x double> [[Y]], <16 x double> [[Z]]) +; CHECK-NEXT: ret <16 x double> [[MAD]] +; + %mad = tail call <16 x double> @_Z3madDv16_dS_S_(<16 x double> %x, <16 x double> %y, <16 x double> %z) + ret <16 x double> %mad +} + +define half @test_mad_f16(half %x, half %y, half %z) { +; CHECK-LABEL: define half @test_mad_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call half @_Z3madDhDhDh(half [[X]], half [[Y]], half [[Z]]) +; CHECK-NEXT: ret half [[MAD]] +; + %mad = tail call half @_Z3madDhDhDh(half %x, half %y, half %z) + ret half %mad +} + +define <2 x half> @test_mad_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +; CHECK-LABEL: define <2 x half> @test_mad_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]], <2 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <2 x half> @_Z3madDv2_DhS_S_(<2 x half> [[X]], <2 x half> [[Y]], <2 x half> [[Z]]) +; CHECK-NEXT: ret <2 x half> [[MAD]] +; + %mad = tail call <2 x half> @_Z3madDv2_DhS_S_(<2 x half> %x, <2 x half> %y, <2 x half> %z) + ret <2 x half> %mad +} + +define <3 x half> @test_mad_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; CHECK-LABEL: define <3 x half> @test_mad_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <3 x half> @_Z3madDv3_DhS_S_(<3 x half> [[X]], <3 x half> [[Y]], <3 x half> [[Z]]) +; CHECK-NEXT: ret <3 x half> [[MAD]] +; + %mad = tail call <3 x half> @_Z3madDv3_DhS_S_(<3 x half> %x, <3 x half> %y, <3 x half> %z) + ret <3 x half> %mad +} + +define <4 x half> @test_mad_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +; CHECK-LABEL: define <4 x half> @test_mad_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]], <4 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <4 x half> @_Z3madDv4_DhS_S_(<4 x half> [[X]], <4 x half> [[Y]], <4 x half> [[Z]]) +; CHECK-NEXT: ret <4 x half> [[MAD]] +; + %mad = tail call <4 x half> @_Z3madDv4_DhS_S_(<4 x half> %x, <4 x half> %y, <4 x half> %z) + ret <4 x half> %mad +} + +define <8 x half> @test_mad_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) { +; CHECK-LABEL: define <8 x half> @test_mad_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <8 x half> @_Z3madDv8_DhS_S_(<8 x half> [[X]], <8 x half> [[Y]], <8 x half> [[Z]]) +; CHECK-NEXT: ret <8 x half> [[MAD]] +; + %mad = tail call <8 x half> @_Z3madDv8_DhS_S_(<8 x half> %x, <8 x half> %y, <8 x half> %z) + ret <8 x half> %mad +} + +define <16 x half> @test_mad_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) { +; CHECK-LABEL: define <16 x half> @test_mad_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]], <16 x half> [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call <16 x half> @_Z3madDv16_DhS_S_(<16 x half> [[X]], <16 x half> [[Y]], <16 x half> [[Z]]) +; CHECK-NEXT: ret <16 x half> [[MAD]] +; + %mad = tail call <16 x half> @_Z3madDv16_DhS_S_(<16 x half> %x, <16 x half> %y, <16 x half> %z) + ret <16 x half> %mad +} + +define float @test_mad_f32_fast(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_mad_f32_fast +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) + ret float %mad +} + +define float @test_mad_f32_noinline(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_mad_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #1 + ret float %mad +} + +define float @test_mad_f32_fast_minsize(float %x, float %y, float %z) #0 { +; CHECK-LABEL: define float @test_mad_f32_fast_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) + ret float %mad +} + +define float @test_mad_f32_fast_strictfp(float %x, float %y, float %z) #2 { +; CHECK-LABEL: define float @test_mad_f32_fast_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[MAD:%.*]] = tail call nnan nsz float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]] +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call nsz nnan float @_Z3madfff(float %x, float %y, float %z) #2 + ret float %mad +} + +define float @test_mad_f32_fast_nobuiltin(float %x, float %y, float %z) { +; CHECK-LABEL: define float @test_mad_f32_fast_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) { +; CHECK-NEXT: [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[MAD]] +; + %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #3 + ret float %mad +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4rintf(float) +declare <2 x float> @_Z4rintDv2_f(<2 x float>) +declare <3 x float> @_Z4rintDv3_f(<3 x float>) +declare <4 x float> @_Z4rintDv4_f(<4 x float>) +declare <8 x float> @_Z4rintDv8_f(<8 x float>) +declare <16 x float> @_Z4rintDv16_f(<16 x float>) + +declare double @_Z4rintd(double) +declare <2 x double> @_Z4rintDv2_d(<2 x double>) +declare <3 x double> @_Z4rintDv3_d(<3 x double>) +declare <4 x double> @_Z4rintDv4_d(<4 x double>) +declare <8 x double> @_Z4rintDv8_d(<8 x double>) +declare <16 x double> @_Z4rintDv16_d(<16 x double>) + +declare half @_Z4rintDh(half) +declare <2 x half> @_Z4rintDv2_Dh(<2 x half>) +declare <3 x half> @_Z4rintDv3_Dh(<3 x half>) +declare <4 x half> @_Z4rintDv4_Dh(<4 x half>) +declare <8 x half> @_Z4rintDv8_Dh(<8 x half>) +declare <16 x half> @_Z4rintDv16_Dh(<16 x half>) + +define float @test_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z4rintf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define <3 x float> @test_rint_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_rint_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x float> @_Z4rintDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[RINT]] +; + %rint = tail call <3 x float> @_Z4rintDv3_f(<3 x float> %arg) + ret <3 x float> %rint +} + +define <4 x float> @test_rint_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_rint_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x float> @_Z4rintDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[RINT]] +; + %rint = tail call <4 x float> @_Z4rintDv4_f(<4 x float> %arg) + ret <4 x float> %rint +} + +define <8 x float> @test_rint_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_rint_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x float> @_Z4rintDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[RINT]] +; + %rint = tail call <8 x float> @_Z4rintDv8_f(<8 x float> %arg) + ret <8 x float> %rint +} + +define <16 x float> @test_rint_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_rint_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x float> @_Z4rintDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[RINT]] +; + %rint = tail call <16 x float> @_Z4rintDv16_f(<16 x float> %arg) + ret <16 x float> %rint +} + +define double @test_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @_Z4rintd(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @_Z4rintd(double %arg) + ret double %rint +} + +define <2 x double> @test_rint_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_rint_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x double> @_Z4rintDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[RINT]] +; + %rint = tail call <2 x double> @_Z4rintDv2_d(<2 x double> %arg) + ret <2 x double> %rint +} + +define <3 x double> @test_rint_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_rint_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x double> @_Z4rintDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[RINT]] +; + %rint = tail call <3 x double> @_Z4rintDv3_d(<3 x double> %arg) + ret <3 x double> %rint +} + +define <4 x double> @test_rint_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_rint_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x double> @_Z4rintDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[RINT]] +; + %rint = tail call <4 x double> @_Z4rintDv4_d(<4 x double> %arg) + ret <4 x double> %rint +} + +define <8 x double> @test_rint_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_rint_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x double> @_Z4rintDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[RINT]] +; + %rint = tail call <8 x double> @_Z4rintDv8_d(<8 x double> %arg) + ret <8 x double> %rint +} + +define <16 x double> @test_rint_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_rint_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x double> @_Z4rintDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[RINT]] +; + %rint = tail call <16 x double> @_Z4rintDv16_d(<16 x double> %arg) + ret <16 x double> %rint +} + +define half @test_rint_f16(half %arg) { +; CHECK-LABEL: define half @test_rint_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call half @_Z4rintDh(half [[ARG]]) +; CHECK-NEXT: ret half [[RINT]] +; + %rint = tail call half @_Z4rintDh(half %arg) + ret half %rint +} + +define <2 x half> @test_rint_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_rint_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x half> @_Z4rintDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[RINT]] +; + %rint = tail call <2 x half> @_Z4rintDv2_Dh(<2 x half> %arg) + ret <2 x half> %rint +} + +define <3 x half> @test_rint_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_rint_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x half> @_Z4rintDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[RINT]] +; + %rint = tail call <3 x half> @_Z4rintDv3_Dh(<3 x half> %arg) + ret <3 x half> %rint +} + +define <4 x half> @test_rint_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_rint_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x half> @_Z4rintDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[RINT]] +; + %rint = tail call <4 x half> @_Z4rintDv4_Dh(<4 x half> %arg) + ret <4 x half> %rint +} + +define <8 x half> @test_rint_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_rint_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x half> @_Z4rintDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[RINT]] +; + %rint = tail call <8 x half> @_Z4rintDv8_Dh(<8 x half> %arg) + ret <8 x half> %rint +} + +define <16 x half> @test_rint_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_rint_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x half> @_Z4rintDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[RINT]] +; + %rint = tail call <16 x half> @_Z4rintDv16_Dh(<16 x half> %arg) + ret <16 x half> %rint +} + +define float @test_rint_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z4rintf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +; "no-builtins" should be ignored +define float @test_rint_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_rint_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z4rintf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z4rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z4rintf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z4rintDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z4rintf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z4rintf(float %arg), !foo !0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z4rintDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %rint +} + +; Test the libm name, not a recognized opencl builtin. +declare float @rintf(float) #2 +declare double @rint(double) #2 + +define float @test_libm_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @rintf(float %arg) + ret float %rint +} + +define double @test_libm_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @rint(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @rint(double %arg) + ret double %rint +} + +define float @test_rint_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_rint_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan float @_Z4rintf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan float @_Z4rintf(float %arg) #3 + ret float %rint +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z5roundf(float) +declare <2 x float> @_Z5roundDv2_f(<2 x float>) +declare <3 x float> @_Z5roundDv3_f(<3 x float>) +declare <4 x float> @_Z5roundDv4_f(<4 x float>) +declare <8 x float> @_Z5roundDv8_f(<8 x float>) +declare <16 x float> @_Z5roundDv16_f(<16 x float>) + +declare double @_Z5roundd(double) +declare <2 x double> @_Z5roundDv2_d(<2 x double>) +declare <3 x double> @_Z5roundDv3_d(<3 x double>) +declare <4 x double> @_Z5roundDv4_d(<4 x double>) +declare <8 x double> @_Z5roundDv8_d(<8 x double>) +declare <16 x double> @_Z5roundDv16_d(<16 x double>) + +declare half @_Z5roundDh(half) +declare <2 x half> @_Z5roundDv2_Dh(<2 x half>) +declare <3 x half> @_Z5roundDv3_Dh(<3 x half>) +declare <4 x half> @_Z5roundDv4_Dh(<4 x half>) +declare <8 x half> @_Z5roundDv8_Dh(<8 x half>) +declare <16 x half> @_Z5roundDv16_Dh(<16 x half>) + +define float @test_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5roundf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define <3 x float> @test_rint_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_rint_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x float> @_Z5roundDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[RINT]] +; + %rint = tail call <3 x float> @_Z5roundDv3_f(<3 x float> %arg) + ret <3 x float> %rint +} + +define <4 x float> @test_rint_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_rint_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x float> @_Z5roundDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[RINT]] +; + %rint = tail call <4 x float> @_Z5roundDv4_f(<4 x float> %arg) + ret <4 x float> %rint +} + +define <8 x float> @test_rint_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_rint_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x float> @_Z5roundDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[RINT]] +; + %rint = tail call <8 x float> @_Z5roundDv8_f(<8 x float> %arg) + ret <8 x float> %rint +} + +define <16 x float> @test_rint_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_rint_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x float> @_Z5roundDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[RINT]] +; + %rint = tail call <16 x float> @_Z5roundDv16_f(<16 x float> %arg) + ret <16 x float> %rint +} + +define double @test_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @_Z5roundd(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @_Z5roundd(double %arg) + ret double %rint +} + +define <2 x double> @test_rint_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_rint_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x double> @_Z5roundDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[RINT]] +; + %rint = tail call <2 x double> @_Z5roundDv2_d(<2 x double> %arg) + ret <2 x double> %rint +} + +define <3 x double> @test_rint_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_rint_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x double> @_Z5roundDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[RINT]] +; + %rint = tail call <3 x double> @_Z5roundDv3_d(<3 x double> %arg) + ret <3 x double> %rint +} + +define <4 x double> @test_rint_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_rint_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x double> @_Z5roundDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[RINT]] +; + %rint = tail call <4 x double> @_Z5roundDv4_d(<4 x double> %arg) + ret <4 x double> %rint +} + +define <8 x double> @test_rint_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_rint_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x double> @_Z5roundDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[RINT]] +; + %rint = tail call <8 x double> @_Z5roundDv8_d(<8 x double> %arg) + ret <8 x double> %rint +} + +define <16 x double> @test_rint_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_rint_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x double> @_Z5roundDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[RINT]] +; + %rint = tail call <16 x double> @_Z5roundDv16_d(<16 x double> %arg) + ret <16 x double> %rint +} + +define half @test_rint_f16(half %arg) { +; CHECK-LABEL: define half @test_rint_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call half @_Z5roundDh(half [[ARG]]) +; CHECK-NEXT: ret half [[RINT]] +; + %rint = tail call half @_Z5roundDh(half %arg) + ret half %rint +} + +define <2 x half> @test_rint_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_rint_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x half> @_Z5roundDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[RINT]] +; + %rint = tail call <2 x half> @_Z5roundDv2_Dh(<2 x half> %arg) + ret <2 x half> %rint +} + +define <3 x half> @test_rint_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_rint_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x half> @_Z5roundDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[RINT]] +; + %rint = tail call <3 x half> @_Z5roundDv3_Dh(<3 x half> %arg) + ret <3 x half> %rint +} + +define <4 x half> @test_rint_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_rint_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x half> @_Z5roundDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[RINT]] +; + %rint = tail call <4 x half> @_Z5roundDv4_Dh(<4 x half> %arg) + ret <4 x half> %rint +} + +define <8 x half> @test_rint_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_rint_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x half> @_Z5roundDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[RINT]] +; + %rint = tail call <8 x half> @_Z5roundDv8_Dh(<8 x half> %arg) + ret <8 x half> %rint +} + +define <16 x half> @test_rint_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_rint_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x half> @_Z5roundDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[RINT]] +; + %rint = tail call <16 x half> @_Z5roundDv16_Dh(<16 x half> %arg) + ret <16 x half> %rint +} + +define float @test_rint_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5roundf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +; "no-builtins" should be ignored +define float @test_rint_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_rint_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5roundf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5roundf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5roundf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5roundDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5roundf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5roundf(float %arg), !foo !0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5roundDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %rint +} + +; Test the libm name, not a recognized opencl builtin. +declare float @rintf(float) #2 +declare double @rint(double) #2 + +define float @test_libm_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @rintf(float %arg) + ret float %rint +} + +define double @test_libm_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @rint(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @rint(double %arg) + ret double %rint +} + +define float @test_rint_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_rint_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan float @_Z5roundf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan float @_Z5roundf(float %arg) #3 + ret float %rint +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll @@ -0,0 +1,457 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z4sqrtf(float) +declare <2 x float> @_Z4sqrtDv2_f(<2 x float>) +declare <3 x float> @_Z4sqrtDv3_f(<3 x float>) +declare <4 x float> @_Z4sqrtDv4_f(<4 x float>) +declare <8 x float> @_Z4sqrtDv8_f(<8 x float>) +declare <16 x float> @_Z4sqrtDv16_f(<16 x float>) + +declare double @_Z4sqrtd(double) +declare <2 x double> @_Z4sqrtDv2_d(<2 x double>) +declare <3 x double> @_Z4sqrtDv3_d(<3 x double>) +declare <4 x double> @_Z4sqrtDv4_d(<4 x double>) +declare <8 x double> @_Z4sqrtDv8_d(<8 x double>) +declare <16 x double> @_Z4sqrtDv16_d(<16 x double>) + +declare half @_Z4sqrtDh(half) +declare <2 x half> @_Z4sqrtDv2_Dh(<2 x half>) +declare <3 x half> @_Z4sqrtDv3_Dh(<3 x half>) +declare <4 x half> @_Z4sqrtDv4_Dh(<4 x half>) +declare <8 x half> @_Z4sqrtDv8_Dh(<8 x half>) +declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>) + +define float @test_sqrt_f32(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %sqrt +} + +define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <3 x float> [[SQRT]] +; + %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0 + ret <3 x float> %sqrt +} + +define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <4 x float> [[SQRT]] +; + %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0 + ret <4 x float> %sqrt +} + +define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <8 x float> [[SQRT]] +; + %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0 + ret <8 x float> %sqrt +} + +define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <16 x float> [[SQRT]] +; + %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0 + ret <16 x float> %sqrt +} + +define float @test_sqrt_cr_f32(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) + ret <2 x float> %sqrt +} + +define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[SQRT]] +; + %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg) + ret <3 x float> %sqrt +} + +define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[SQRT]] +; + %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg) + ret <4 x float> %sqrt +} + +define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[SQRT]] +; + %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg) + ret <8 x float> %sqrt +} + +define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[SQRT]] +; + %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg) + ret <16 x float> %sqrt +} + +define double @test_sqrt_f64(double %arg) { +; CHECK-LABEL: define double @test_sqrt_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]]) +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @_Z4sqrtd(double %arg) + ret double %sqrt +} + +define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[SQRT]] +; + %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg) + ret <2 x double> %sqrt +} + +define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[SQRT]] +; + %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg) + ret <3 x double> %sqrt +} + +define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[SQRT]] +; + %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg) + ret <4 x double> %sqrt +} + +define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[SQRT]] +; + %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg) + ret <8 x double> %sqrt +} + +define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[SQRT]] +; + %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg) + ret <16 x double> %sqrt +} + +define half @test_sqrt_f16(half %arg) { +; CHECK-LABEL: define half @test_sqrt_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]]) +; CHECK-NEXT: ret half [[SQRT]] +; + %sqrt = tail call half @_Z4sqrtDh(half %arg) + ret half %sqrt +} + +define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[SQRT]] +; + %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg) + ret <2 x half> %sqrt +} + +define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[SQRT]] +; + %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg) + ret <3 x half> %sqrt +} + +define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[SQRT]] +; + %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg) + ret <4 x half> %sqrt +} + +define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[SQRT]] +; + %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg) + ret <8 x half> %sqrt +} + +define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[SQRT]] +; + %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg) + ret <16 x half> %sqrt +} + +define float @test_sqrt_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0 + ret <2 x float> %sqrt +} + +; "no-builtins" should be ignored +define float @test_sqrt_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @_Z4sqrtf(float %arg) #0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0 + ret <2 x float> %sqrt +} + +define float @test_sqrt_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1 + ret float %sqrt +} + +define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1 +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1 + ret <2 x float> %sqrt +} + +define float @test_sqrt_cr_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf contract float @_Z4sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg) + ret float %sqrt +} + +define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[SQRT]] +; + %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) + ret <2 x float> %sqrt +} + +; Test the libm name, not a recognized opencl builtin. +declare float @sqrtf(float) #2 +declare double @sqrt(double) #2 + +define float @test_libm_sqrt_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_sqrt_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]) +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @sqrtf(float %arg) + ret float %sqrt +} + +define float @test_libm_sqrt_f32_fpmath(float %arg) { +; CHECK-LABEL: define float @test_libm_sqrt_f32_fpmath +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret float [[SQRT]] +; + %sqrt = tail call float @sqrtf(float %arg), !fpmath !0 + ret float %sqrt +} + +define double @test_libm_sqrt_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_sqrt_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]) +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @sqrt(double %arg) + ret double %sqrt +} + +define double @test_libm_sqrt_f64_fpmath(double %arg) { +; CHECK-LABEL: define double @test_libm_sqrt_f64_fpmath +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]), !fpmath !0 +; CHECK-NEXT: ret double [[SQRT]] +; + %sqrt = tail call double @sqrt(double %arg), !fpmath !0 + ret double %sqrt +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } + +!0 = !{float 3.000000e+00} +!1 = !{i32 1234} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare float @_Z5truncf(float) +declare <2 x float> @_Z5truncDv2_f(<2 x float>) +declare <3 x float> @_Z5truncDv3_f(<3 x float>) +declare <4 x float> @_Z5truncDv4_f(<4 x float>) +declare <8 x float> @_Z5truncDv8_f(<8 x float>) +declare <16 x float> @_Z5truncDv16_f(<16 x float>) + +declare double @_Z5truncd(double) +declare <2 x double> @_Z5truncDv2_d(<2 x double>) +declare <3 x double> @_Z5truncDv3_d(<3 x double>) +declare <4 x double> @_Z5truncDv4_d(<4 x double>) +declare <8 x double> @_Z5truncDv8_d(<8 x double>) +declare <16 x double> @_Z5truncDv16_d(<16 x double>) + +declare half @_Z5truncDh(half) +declare <2 x half> @_Z5truncDv2_Dh(<2 x half>) +declare <3 x half> @_Z5truncDv3_Dh(<3 x half>) +declare <4 x half> @_Z5truncDv4_Dh(<4 x half>) +declare <8 x half> @_Z5truncDv8_Dh(<8 x half>) +declare <16 x half> @_Z5truncDv16_Dh(<16 x half>) + +define float @test_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5truncf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32 +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define <3 x float> @test_rint_v3f32(<3 x float> %arg) { +; CHECK-LABEL: define <3 x float> @test_rint_v3f32 +; CHECK-SAME: (<3 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x float> @_Z5truncDv3_f(<3 x float> [[ARG]]) +; CHECK-NEXT: ret <3 x float> [[RINT]] +; + %rint = tail call <3 x float> @_Z5truncDv3_f(<3 x float> %arg) + ret <3 x float> %rint +} + +define <4 x float> @test_rint_v4f32(<4 x float> %arg) { +; CHECK-LABEL: define <4 x float> @test_rint_v4f32 +; CHECK-SAME: (<4 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x float> @_Z5truncDv4_f(<4 x float> [[ARG]]) +; CHECK-NEXT: ret <4 x float> [[RINT]] +; + %rint = tail call <4 x float> @_Z5truncDv4_f(<4 x float> %arg) + ret <4 x float> %rint +} + +define <8 x float> @test_rint_v8f32(<8 x float> %arg) { +; CHECK-LABEL: define <8 x float> @test_rint_v8f32 +; CHECK-SAME: (<8 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x float> @_Z5truncDv8_f(<8 x float> [[ARG]]) +; CHECK-NEXT: ret <8 x float> [[RINT]] +; + %rint = tail call <8 x float> @_Z5truncDv8_f(<8 x float> %arg) + ret <8 x float> %rint +} + +define <16 x float> @test_rint_v16f32(<16 x float> %arg) { +; CHECK-LABEL: define <16 x float> @test_rint_v16f32 +; CHECK-SAME: (<16 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x float> @_Z5truncDv16_f(<16 x float> [[ARG]]) +; CHECK-NEXT: ret <16 x float> [[RINT]] +; + %rint = tail call <16 x float> @_Z5truncDv16_f(<16 x float> %arg) + ret <16 x float> %rint +} + +define double @test_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @_Z5truncd(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @_Z5truncd(double %arg) + ret double %rint +} + +define <2 x double> @test_rint_v2f64(<2 x double> %arg) { +; CHECK-LABEL: define <2 x double> @test_rint_v2f64 +; CHECK-SAME: (<2 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x double> @_Z5truncDv2_d(<2 x double> [[ARG]]) +; CHECK-NEXT: ret <2 x double> [[RINT]] +; + %rint = tail call <2 x double> @_Z5truncDv2_d(<2 x double> %arg) + ret <2 x double> %rint +} + +define <3 x double> @test_rint_v3f64(<3 x double> %arg) { +; CHECK-LABEL: define <3 x double> @test_rint_v3f64 +; CHECK-SAME: (<3 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x double> @_Z5truncDv3_d(<3 x double> [[ARG]]) +; CHECK-NEXT: ret <3 x double> [[RINT]] +; + %rint = tail call <3 x double> @_Z5truncDv3_d(<3 x double> %arg) + ret <3 x double> %rint +} + +define <4 x double> @test_rint_v4f64(<4 x double> %arg) { +; CHECK-LABEL: define <4 x double> @test_rint_v4f64 +; CHECK-SAME: (<4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x double> @_Z5truncDv4_d(<4 x double> [[ARG]]) +; CHECK-NEXT: ret <4 x double> [[RINT]] +; + %rint = tail call <4 x double> @_Z5truncDv4_d(<4 x double> %arg) + ret <4 x double> %rint +} + +define <8 x double> @test_rint_v8f64(<8 x double> %arg) { +; CHECK-LABEL: define <8 x double> @test_rint_v8f64 +; CHECK-SAME: (<8 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x double> @_Z5truncDv8_d(<8 x double> [[ARG]]) +; CHECK-NEXT: ret <8 x double> [[RINT]] +; + %rint = tail call <8 x double> @_Z5truncDv8_d(<8 x double> %arg) + ret <8 x double> %rint +} + +define <16 x double> @test_rint_v16f64(<16 x double> %arg) { +; CHECK-LABEL: define <16 x double> @test_rint_v16f64 +; CHECK-SAME: (<16 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x double> @_Z5truncDv16_d(<16 x double> [[ARG]]) +; CHECK-NEXT: ret <16 x double> [[RINT]] +; + %rint = tail call <16 x double> @_Z5truncDv16_d(<16 x double> %arg) + ret <16 x double> %rint +} + +define half @test_rint_f16(half %arg) { +; CHECK-LABEL: define half @test_rint_f16 +; CHECK-SAME: (half [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call half @_Z5truncDh(half [[ARG]]) +; CHECK-NEXT: ret half [[RINT]] +; + %rint = tail call half @_Z5truncDh(half %arg) + ret half %rint +} + +define <2 x half> @test_rint_v2f16(<2 x half> %arg) { +; CHECK-LABEL: define <2 x half> @test_rint_v2f16 +; CHECK-SAME: (<2 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x half> @_Z5truncDv2_Dh(<2 x half> [[ARG]]) +; CHECK-NEXT: ret <2 x half> [[RINT]] +; + %rint = tail call <2 x half> @_Z5truncDv2_Dh(<2 x half> %arg) + ret <2 x half> %rint +} + +define <3 x half> @test_rint_v3f16(<3 x half> %arg) { +; CHECK-LABEL: define <3 x half> @test_rint_v3f16 +; CHECK-SAME: (<3 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <3 x half> @_Z5truncDv3_Dh(<3 x half> [[ARG]]) +; CHECK-NEXT: ret <3 x half> [[RINT]] +; + %rint = tail call <3 x half> @_Z5truncDv3_Dh(<3 x half> %arg) + ret <3 x half> %rint +} + +define <4 x half> @test_rint_v4f16(<4 x half> %arg) { +; CHECK-LABEL: define <4 x half> @test_rint_v4f16 +; CHECK-SAME: (<4 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <4 x half> @_Z5truncDv4_Dh(<4 x half> [[ARG]]) +; CHECK-NEXT: ret <4 x half> [[RINT]] +; + %rint = tail call <4 x half> @_Z5truncDv4_Dh(<4 x half> %arg) + ret <4 x half> %rint +} + +define <8 x half> @test_rint_v8f16(<8 x half> %arg) { +; CHECK-LABEL: define <8 x half> @test_rint_v8f16 +; CHECK-SAME: (<8 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <8 x half> @_Z5truncDv8_Dh(<8 x half> [[ARG]]) +; CHECK-NEXT: ret <8 x half> [[RINT]] +; + %rint = tail call <8 x half> @_Z5truncDv8_Dh(<8 x half> %arg) + ret <8 x half> %rint +} + +define <16 x half> @test_rint_v16f16(<16 x half> %arg) { +; CHECK-LABEL: define <16 x half> @test_rint_v16f16 +; CHECK-SAME: (<16 x half> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <16 x half> @_Z5truncDv16_Dh(<16 x half> [[ARG]]) +; CHECK-NEXT: ret <16 x half> [[RINT]] +; + %rint = tail call <16 x half> @_Z5truncDv16_Dh(<16 x half> %arg) + ret <16 x half> %rint +} + +define float @test_rint_f32_nobuiltin_callsite(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5truncf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +; "no-builtins" should be ignored +define float @test_rint_f32_nobuiltins(float %arg) #1 { +; CHECK-LABEL: define float @test_rint_f32_nobuiltins +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @_Z5truncf(float %arg) #0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR3]] +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0 + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5truncf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5truncf(float %arg) + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg) + ret <2 x float> %rint +} + +define float @test_rint_f32_preserve_flags_md(float %arg) { +; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan ninf float @_Z5truncf(float [[ARG]]), !foo !0 +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan ninf float @_Z5truncf(float %arg), !foo !0 + ret float %rint +} + +define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) { +; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md +; CHECK-SAME: (<2 x float> [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]), !foo !0 +; CHECK-NEXT: ret <2 x float> [[RINT]] +; + %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg), !foo !0 + ret <2 x float> %rint +} + +; Test the libm name, not a recognized opencl builtin. +declare float @rintf(float) #2 +declare double @rint(double) #2 + +define float @test_libm_rint_f32(float %arg) { +; CHECK-LABEL: define float @test_libm_rint_f32 +; CHECK-SAME: (float [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call float @rintf(float [[ARG]]) +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call float @rintf(float %arg) + ret float %rint +} + +define double @test_libm_rint_f64(double %arg) { +; CHECK-LABEL: define double @test_libm_rint_f64 +; CHECK-SAME: (double [[ARG:%.*]]) { +; CHECK-NEXT: [[RINT:%.*]] = tail call double @rint(double [[ARG]]) +; CHECK-NEXT: ret double [[RINT]] +; + %rint = tail call double @rint(double %arg) + ret double %rint +} + +define float @test_rint_f32_strictfp(float %arg) #3 { +; CHECK-LABEL: define float @test_rint_f32_strictfp +; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[RINT:%.*]] = tail call nnan float @_Z5truncf(float [[ARG]]) #[[ATTR2]] +; CHECK-NEXT: ret float [[RINT]] +; + %rint = tail call nnan float @_Z5truncf(float %arg) #3 + ret float %rint +} + +attributes #0 = { nobuiltin } +attributes #1 = { "no-builtins" } +attributes #2 = { nounwind memory(none) } +attributes #3 = { strictfp } + +!0 = !{i32 1234}