Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4ceilf(float)
+declare <2 x float> @_Z4ceilDv2_f(<2 x float>)
+declare <3 x float> @_Z4ceilDv3_f(<3 x float>)
+declare <4 x float> @_Z4ceilDv4_f(<4 x float>)
+declare <8 x float> @_Z4ceilDv8_f(<8 x float>)
+declare <16 x float> @_Z4ceilDv16_f(<16 x float>)
+
+declare double @_Z4ceild(double)
+declare <2 x double> @_Z4ceilDv2_d(<2 x double>)
+declare <3 x double> @_Z4ceilDv3_d(<3 x double>)
+declare <4 x double> @_Z4ceilDv4_d(<4 x double>)
+declare <8 x double> @_Z4ceilDv8_d(<8 x double>)
+declare <16 x double> @_Z4ceilDv16_d(<16 x double>)
+
+declare half @_Z4ceilDh(half)
+declare <2 x half> @_Z4ceilDv2_Dh(<2 x half>)
+declare <3 x half> @_Z4ceilDv3_Dh(<3 x half>)
+declare <4 x half> @_Z4ceilDv4_Dh(<4 x half>)
+declare <8 x half> @_Z4ceilDv8_Dh(<8 x half>)
+declare <16 x half> @_Z4ceilDv16_Dh(<16 x half>)
+
+define float @test_ceil_f32(float %arg) {
+; CHECK-LABEL: define float @test_ceil_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]])
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call float @_Z4ceilf(float %arg)
+  ret float %ceil
+}
+
+define <2 x float> @test_ceil_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_ceil_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[CEIL]]
+;
+  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg)
+  ret <2 x float> %ceil
+}
+
+define <3 x float> @test_ceil_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_ceil_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <3 x float> @_Z4ceilDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[CEIL]]
+;
+  %ceil = tail call <3 x float> @_Z4ceilDv3_f(<3 x float> %arg)
+  ret <3 x float> %ceil
+}
+
+define <4 x float> @test_ceil_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_ceil_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <4 x float> @_Z4ceilDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[CEIL]]
+;
+  %ceil = tail call <4 x float> @_Z4ceilDv4_f(<4 x float> %arg)
+  ret <4 x float> %ceil
+}
+
+define <8 x float> @test_ceil_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_ceil_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <8 x float> @_Z4ceilDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[CEIL]]
+;
+  %ceil = tail call <8 x float> @_Z4ceilDv8_f(<8 x float> %arg)
+  ret <8 x float> %ceil
+}
+
+define <16 x float> @test_ceil_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_ceil_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <16 x float> @_Z4ceilDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[CEIL]]
+;
+  %ceil = tail call <16 x float> @_Z4ceilDv16_f(<16 x float> %arg)
+  ret <16 x float> %ceil
+}
+
+define double @test_ceil_f64(double %arg) {
+; CHECK-LABEL: define double @test_ceil_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call double @_Z4ceild(double [[ARG]])
+; CHECK-NEXT:    ret double [[CEIL]]
+;
+  %ceil = tail call double @_Z4ceild(double %arg)
+  ret double %ceil
+}
+
+define <2 x double> @test_ceil_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_ceil_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x double> @_Z4ceilDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[CEIL]]
+;
+  %ceil = tail call <2 x double> @_Z4ceilDv2_d(<2 x double> %arg)
+  ret <2 x double> %ceil
+}
+
+define <3 x double> @test_ceil_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_ceil_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <3 x double> @_Z4ceilDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[CEIL]]
+;
+  %ceil = tail call <3 x double> @_Z4ceilDv3_d(<3 x double> %arg)
+  ret <3 x double> %ceil
+}
+
+define <4 x double> @test_ceil_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_ceil_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <4 x double> @_Z4ceilDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[CEIL]]
+;
+  %ceil = tail call <4 x double> @_Z4ceilDv4_d(<4 x double> %arg)
+  ret <4 x double> %ceil
+}
+
+define <8 x double> @test_ceil_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_ceil_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <8 x double> @_Z4ceilDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[CEIL]]
+;
+  %ceil = tail call <8 x double> @_Z4ceilDv8_d(<8 x double> %arg)
+  ret <8 x double> %ceil
+}
+
+define <16 x double> @test_ceil_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_ceil_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <16 x double> @_Z4ceilDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[CEIL]]
+;
+  %ceil = tail call <16 x double> @_Z4ceilDv16_d(<16 x double> %arg)
+  ret <16 x double> %ceil
+}
+
+define half @test_ceil_f16(half %arg) {
+; CHECK-LABEL: define half @test_ceil_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call half @_Z4ceilDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[CEIL]]
+;
+  %ceil = tail call half @_Z4ceilDh(half %arg)
+  ret half %ceil
+}
+
+define <2 x half> @test_ceil_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_ceil_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x half> @_Z4ceilDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[CEIL]]
+;
+  %ceil = tail call <2 x half> @_Z4ceilDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %ceil
+}
+
+define <3 x half> @test_ceil_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_ceil_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <3 x half> @_Z4ceilDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[CEIL]]
+;
+  %ceil = tail call <3 x half> @_Z4ceilDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %ceil
+}
+
+define <4 x half> @test_ceil_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_ceil_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <4 x half> @_Z4ceilDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[CEIL]]
+;
+  %ceil = tail call <4 x half> @_Z4ceilDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %ceil
+}
+
+define <8 x half> @test_ceil_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_ceil_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <8 x half> @_Z4ceilDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[CEIL]]
+;
+  %ceil = tail call <8 x half> @_Z4ceilDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %ceil
+}
+
+define <16 x half> @test_ceil_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_ceil_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <16 x half> @_Z4ceilDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[CEIL]]
+;
+  %ceil = tail call <16 x half> @_Z4ceilDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %ceil
+}
+
+define float @test_ceil_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_ceil_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call float @_Z4ceilf(float %arg) #0
+  ret float %ceil
+}
+
+define <2 x float> @test_ceil_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[CEIL]]
+;
+  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %ceil
+}
+
+; "no-builtins" should be ignored
+define float @test_ceil_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_ceil_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call float @_Z4ceilf(float %arg) #0
+  ret float %ceil
+}
+
+define <2 x float> @test_ceil_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[CEIL]]
+;
+  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %ceil
+}
+
+define float @test_ceil_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_ceil_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan ninf float @_Z4ceilf(float [[ARG]])
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call nnan ninf float @_Z4ceilf(float %arg)
+  ret float %ceil
+}
+
+define <2 x float> @test_ceil_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan nsz contract <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[CEIL]]
+;
+  %ceil = tail call contract nsz nnan <2 x float> @_Z4ceilDv2_f(<2 x float> %arg)
+  ret <2 x float> %ceil
+}
+
+define float @test_ceil_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_ceil_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan ninf float @_Z4ceilf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call nnan ninf float @_Z4ceilf(float %arg), !foo !0
+  ret float %ceil
+}
+
+define <2 x float> @test_ceil_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan nsz contract <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[CEIL]]
+;
+  %ceil = tail call contract nsz nnan <2 x float> @_Z4ceilDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %ceil
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @ceilf(float) #2
+declare double @ceil(double) #2
+
+define float @test_libm_ceil_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_ceil_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @ceilf(float [[ARG]])
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call float @ceilf(float %arg)
+  ret float %ceil
+}
+
+define double @test_libm_ceil_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_ceil_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call double @ceil(double [[ARG]])
+; CHECK-NEXT:    ret double [[CEIL]]
+;
+  %ceil = tail call double @ceil(double %arg)
+  ret double %ceil
+}
+
+define float @test_ceil_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_ceil_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan float @_Z4ceilf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[CEIL]]
+;
+  %ceil = tail call nnan float @_Z4ceilf(float %arg) #3
+  ret float %ceil
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z8copysignff(float, float)
+declare <2 x float> @_Z8copysignDv2_fS_(<2 x float>, <2 x float>)
+declare <3 x float> @_Z8copysignDv3_fS_(<3 x float>, <3 x float>)
+declare <4 x float> @_Z8copysignDv4_fS_(<4 x float>, <4 x float>)
+declare <8 x float> @_Z8copysignDv8_fS_(<8 x float>, <8 x float>)
+declare <16 x float> @_Z8copysignDv16_fS_(<16 x float>, <16 x float>)
+declare double @_Z8copysigndd(double, double)
+declare <2 x double> @_Z8copysignDv2_dS_(<2 x double>, <2 x double>)
+declare <3 x double> @_Z8copysignDv3_dS_(<3 x double>, <3 x double>)
+declare <4 x double> @_Z8copysignDv4_dS_(<4 x double>, <4 x double>)
+declare <8 x double> @_Z8copysignDv8_dS_(<8 x double>, <8 x double>)
+declare <16 x double> @_Z8copysignDv16_dS_(<16 x double>, <16 x double>)
+declare half @_Z8copysignDhDh(half, half)
+declare <2 x half> @_Z8copysignDv2_DhS_(<2 x half>, <2 x half>)
+declare <3 x half> @_Z8copysignDv3_DhS_(<3 x half>, <3 x half>)
+declare <4 x half> @_Z8copysignDv4_DhS_(<4 x half>, <4 x half>)
+declare <8 x half> @_Z8copysignDv8_DhS_(<8 x half>, <8 x half>)
+declare <16 x half> @_Z8copysignDv16_DhS_(<16 x half>, <16 x half>)
+
+define float @test_copysign_f32(float %x, float %y) {
+; CHECK-LABEL: define float @test_copysign_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call float @_Z8copysignff(float %x, float %y)
+  ret float %copysign
+}
+
+define float @test_copysign_f32_nnan(float %x, float %y) {
+; CHECK-LABEL: define float @test_copysign_f32_nnan
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call nnan float @_Z8copysignff(float %x, float %y)
+  ret float %copysign
+}
+
+define <2 x float> @test_copysign_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_copysign_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[COPYSIGN]]
+;
+  %copysign = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %copysign
+}
+
+define <3 x float> @test_copysign_v3f32(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_copysign_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[COPYSIGN]]
+;
+  %copysign = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> %x, <3 x float> %y)
+  ret <3 x float> %copysign
+}
+
+define <4 x float> @test_copysign_v4f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_copysign_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[COPYSIGN]]
+;
+  %copysign = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %copysign
+}
+
+define <8 x float> @test_copysign_v8f32(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: define <8 x float> @test_copysign_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[COPYSIGN]]
+;
+  %copysign = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> %x, <8 x float> %y)
+  ret <8 x float> %copysign
+}
+
+define <16 x float> @test_copysign_v16f32(<16 x float> %x, <16 x float> %y) {
+; CHECK-LABEL: define <16 x float> @test_copysign_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[COPYSIGN]]
+;
+  %copysign = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> %x, <16 x float> %y)
+  ret <16 x float> %copysign
+}
+
+define double @test_copysign_f64(double %x, double %y) {
+; CHECK-LABEL: define double @test_copysign_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call double @_Z8copysigndd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[COPYSIGN]]
+;
+  %copysign = tail call double @_Z8copysigndd(double %x, double %y)
+  ret double %copysign
+}
+
+define <2 x double> @test_copysign_v2f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_copysign_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[COPYSIGN]]
+;
+  %copysign = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %copysign
+}
+
+define <3 x double> @test_copysign_v3f64(<3 x double> %x, <3 x double> %y) {
+; CHECK-LABEL: define <3 x double> @test_copysign_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[COPYSIGN]]
+;
+  %copysign = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> %x, <3 x double> %y)
+  ret <3 x double> %copysign
+}
+
+define <4 x double> @test_copysign_v4f64(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: define <4 x double> @test_copysign_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[COPYSIGN]]
+;
+  %copysign = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> %x, <4 x double> %y)
+  ret <4 x double> %copysign
+}
+
+define <8 x double> @test_copysign_v8f64(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: define <8 x double> @test_copysign_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[COPYSIGN]]
+;
+  %copysign = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> %x, <8 x double> %y)
+  ret <8 x double> %copysign
+}
+
+define <16 x double> @test_copysign_v16f64(<16 x double> %x, <16 x double> %y) {
+; CHECK-LABEL: define <16 x double> @test_copysign_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[COPYSIGN]]
+;
+  %copysign = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> %x, <16 x double> %y)
+  ret <16 x double> %copysign
+}
+
+define half @test_copysign_f16(half %x, half %y) {
+; CHECK-LABEL: define half @test_copysign_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call half @_Z8copysignDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[COPYSIGN]]
+;
+  %copysign = tail call half @_Z8copysignDhDh(half %x, half %y)
+  ret half %copysign
+}
+
+define <2 x half> @test_copysign_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_copysign_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[COPYSIGN]]
+;
+  %copysign = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %copysign
+}
+
+define <3 x half> @test_copysign_v3f16(<3 x half> %x, <3 x half> %y) {
+; CHECK-LABEL: define <3 x half> @test_copysign_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[COPYSIGN]]
+;
+  %copysign = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> %x, <3 x half> %y)
+  ret <3 x half> %copysign
+}
+
+define <4 x half> @test_copysign_v4f16(<4 x half> %x, <4 x half> %y) {
+; CHECK-LABEL: define <4 x half> @test_copysign_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[COPYSIGN]]
+;
+  %copysign = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> %x, <4 x half> %y)
+  ret <4 x half> %copysign
+}
+
+define <8 x half> @test_copysign_v8f16(<8 x half> %x, <8 x half> %y) {
+; CHECK-LABEL: define <8 x half> @test_copysign_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[COPYSIGN]]
+;
+  %copysign = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> %x, <8 x half> %y)
+  ret <8 x half> %copysign
+}
+
+define <16 x half> @test_copysign_v16f16(<16 x half> %x, <16 x half> %y) {
+; CHECK-LABEL: define <16 x half> @test_copysign_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[COPYSIGN]]
+;
+  %copysign = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> %x, <16 x half> %y)
+  ret <16 x half> %copysign
+}
+
+define float @test_copysign_f32_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_copysign_f32_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call float @_Z8copysignff(float %x, float %y)
+  ret float %copysign
+}
+
+define float @test_copysign_f32_nnan_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_copysign_f32_nnan_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call nnan float @_Z8copysignff(float %x, float %y)
+  ret float %copysign
+}
+
+define float @test_copysign_f32_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_copysign_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call float @_Z8copysignff(float %x, float %y) #1
+  ret float %copysign
+}
+
+define float @test_copysign_f32_nnan_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_copysign_f32_nnan_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) #1
+  ret float %copysign
+}
+
+define float @test_copysign_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @test_copysign_f32_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan nsz float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call nsz nnan float @_Z8copysignff(float %x, float %y) #2
+  ret float %copysign
+}
+
+define float @test_copysign_f32_fast_nobuiltin(float %x, float %y) {
+; CHECK-LABEL: define float @test_copysign_f32_fast_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call fast float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[COPYSIGN]]
+;
+  %copysign = tail call fast float @_Z8copysignff(float %x, float %y) #3
+  ret float %copysign
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll
@@ -0,0 +1,561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3expf(float)
+declare <2 x float> @_Z3expDv2_f(<2 x float>)
+declare <3 x float> @_Z3expDv3_f(<3 x float>)
+declare <4 x float> @_Z3expDv4_f(<4 x float>)
+declare <8 x float> @_Z3expDv8_f(<8 x float>)
+declare <16 x float> @_Z3expDv16_f(<16 x float>)
+
+declare double @_Z3expd(double)
+declare <2 x double> @_Z3expDv2_d(<2 x double>)
+declare <3 x double> @_Z3expDv3_d(<3 x double>)
+declare <4 x double> @_Z3expDv4_d(<4 x double>)
+declare <8 x double> @_Z3expDv8_d(<8 x double>)
+declare <16 x double> @_Z3expDv16_d(<16 x double>)
+
+declare half @_Z3expDh(half)
+declare <2 x half> @_Z3expDv2_Dh(<2 x half>)
+declare <3 x half> @_Z3expDv3_Dh(<3 x half>)
+declare <4 x half> @_Z3expDv4_Dh(<4 x half>)
+declare <8 x half> @_Z3expDv8_Dh(<8 x half>)
+declare <16 x half> @_Z3expDv16_Dh(<16 x half>)
+
+define float @test_exp_f32(float %arg) {
+; CHECK-LABEL: define float @test_exp_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %exp
+}
+
+define <3 x float> @test_exp_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_exp_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <3 x float> @_Z3expDv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[EXP]]
+;
+  %exp = tail call <3 x float> @_Z3expDv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %exp
+}
+
+define <4 x float> @test_exp_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_exp_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <4 x float> @_Z3expDv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[EXP]]
+;
+  %exp = tail call <4 x float> @_Z3expDv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %exp
+}
+
+define <8 x float> @test_exp_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_exp_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <8 x float> @_Z3expDv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[EXP]]
+;
+  %exp = tail call <8 x float> @_Z3expDv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %exp
+}
+
+define <16 x float> @test_exp_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_exp_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <16 x float> @_Z3expDv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[EXP]]
+;
+  %exp = tail call <16 x float> @_Z3expDv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %exp
+}
+
+define float @test_exp_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_exp_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg)
+  ret float %exp
+}
+
+define <2 x float> @test_exp_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg)
+  ret <2 x float> %exp
+}
+
+define <3 x float> @test_exp_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_exp_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <3 x float> @_Z3expDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[EXP]]
+;
+  %exp = tail call <3 x float> @_Z3expDv3_f(<3 x float> %arg)
+  ret <3 x float> %exp
+}
+
+define <4 x float> @test_exp_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_exp_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <4 x float> @_Z3expDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[EXP]]
+;
+  %exp = tail call <4 x float> @_Z3expDv4_f(<4 x float> %arg)
+  ret <4 x float> %exp
+}
+
+define <8 x float> @test_exp_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_exp_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <8 x float> @_Z3expDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[EXP]]
+;
+  %exp = tail call <8 x float> @_Z3expDv8_f(<8 x float> %arg)
+  ret <8 x float> %exp
+}
+
+define <16 x float> @test_exp_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_exp_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <16 x float> @_Z3expDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[EXP]]
+;
+  %exp = tail call <16 x float> @_Z3expDv16_f(<16 x float> %arg)
+  ret <16 x float> %exp
+}
+
+define double @test_exp_f64(double %arg) {
+; CHECK-LABEL: define double @test_exp_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call double @_Z3expd(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %exp = tail call double @_Z3expd(double %arg)
+  ret double %exp
+}
+
+define <2 x double> @test_exp_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_exp_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x double> @_Z3expDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[EXP]]
+;
+  %exp = tail call <2 x double> @_Z3expDv2_d(<2 x double> %arg)
+  ret <2 x double> %exp
+}
+
+define <3 x double> @test_exp_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_exp_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <3 x double> @_Z3expDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[EXP]]
+;
+  %exp = tail call <3 x double> @_Z3expDv3_d(<3 x double> %arg)
+  ret <3 x double> %exp
+}
+
+define <4 x double> @test_exp_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_exp_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <4 x double> @_Z3expDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[EXP]]
+;
+  %exp = tail call <4 x double> @_Z3expDv4_d(<4 x double> %arg)
+  ret <4 x double> %exp
+}
+
+define <8 x double> @test_exp_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_exp_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <8 x double> @_Z3expDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[EXP]]
+;
+  %exp = tail call <8 x double> @_Z3expDv8_d(<8 x double> %arg)
+  ret <8 x double> %exp
+}
+
+define <16 x double> @test_exp_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_exp_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <16 x double> @_Z3expDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[EXP]]
+;
+  %exp = tail call <16 x double> @_Z3expDv16_d(<16 x double> %arg)
+  ret <16 x double> %exp
+}
+
+define half @test_exp_f16(half %arg) {
+; CHECK-LABEL: define half @test_exp_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call half @_Z3expDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP]]
+;
+  %exp = tail call half @_Z3expDh(half %arg)
+  ret half %exp
+}
+
+define half @test_exp_f16_fast(half %arg) {
+; CHECK-LABEL: define half @test_exp_f16_fast
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast half @_Z3expDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP]]
+;
+  %exp = tail call fast half @_Z3expDh(half %arg)
+  ret half %exp
+}
+
+define <2 x half> @test_exp_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_exp_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x half> @_Z3expDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[EXP]]
+;
+  %exp = tail call <2 x half> @_Z3expDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %exp
+}
+
+define <3 x half> @test_exp_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_exp_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <3 x half> @_Z3expDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[EXP]]
+;
+  %exp = tail call <3 x half> @_Z3expDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %exp
+}
+
+define <4 x half> @test_exp_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_exp_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <4 x half> @_Z3expDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[EXP]]
+;
+  %exp = tail call <4 x half> @_Z3expDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %exp
+}
+
+define <8 x half> @test_exp_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_exp_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <8 x half> @_Z3expDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[EXP]]
+;
+  %exp = tail call <8 x half> @_Z3expDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %exp
+}
+
+define <16 x half> @test_exp_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_exp_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <16 x half> @_Z3expDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[EXP]]
+;
+  %exp = tail call <16 x half> @_Z3expDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %exp
+}
+
+define float @test_exp_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_exp_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %exp
+}
+
+define float @test_exp_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_exp_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg) #0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %exp
+}
+
+; "no-builtins" should be ignored
+define float @test_exp_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_exp_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_exp_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %exp
+}
+
+define float @test_exp_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_exp_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @_Z3expf(float %arg) #0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %exp
+}
+
+define float @test_exp_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_exp_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nnan ninf float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nnan ninf float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define <2 x float> @test_exp_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nnan nsz contract <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call contract nsz nnan <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %exp
+}
+
+define float @test_exp_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_exp_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nnan ninf float @_Z3expf(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nnan ninf float @_Z3expf(float %arg), !fpmath !0, !foo !1
+  ret float %exp
+}
+
+define <2 x float> @test_exp_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nnan nsz contract <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call contract nsz nnan <2 x float> @_Z3expDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %exp
+}
+
+define float @test_exp_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_exp_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call ninf contract float @_Z3expf(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call ninf contract float @_Z3expf(float %arg)
+  ret float %exp
+}
+
+define <2 x float> @test_exp_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nnan nsz <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[EXP]]
+;
+  %exp = tail call nnan nsz <2 x float> @_Z3expDv2_f(<2 x float> %arg)
+  ret <2 x float> %exp
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @expf(float) #2
+declare double @exp(double) #2
+
+define float @test_libm_exp_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @expf(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @expf(float %arg)
+  ret float %exp
+}
+
+define float @test_libm_exp_f32_fast(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp_f32_fast
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @expf(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call fast float @expf(float %arg)
+  ret float %exp
+}
+
+define float @test_libm_exp_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call float @expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call float @expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define double @test_libm_exp_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call double @exp(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %exp = tail call double @exp(double %arg)
+  ret double %exp
+}
+
+define double @test_libm_exp_f64_fast(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp_f64_fast
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast double @exp(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %exp = tail call fast double @exp(double %arg)
+  ret double %exp
+}
+
+define double @test_libm_exp_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call double @exp(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %exp = tail call double @exp(double %arg), !fpmath !0
+  ret double %exp
+}
+
+define float @test_exp_f32_fast_noinline(float %arg) {
+; CHECK-LABEL: define float @test_exp_f32_fast_noinline
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call fast float @_Z3expf(float %arg) #3, !fpmath !0
+  ret float %exp
+}
+
+define float @test_exp_f32_fast_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_exp_f32_fast_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call fast float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define float @test_exp_f32_fast_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_exp_f32_fast_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call fast float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define float @test_exp_f32_nsz_contract_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_exp_f32_nsz_contract_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz contract float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nsz contract float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define float @test_exp_f32_nsz_contract_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_exp_f32_nsz_contract_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz contract float @_Z3expf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nsz contract float @_Z3expf(float %arg), !fpmath !0
+  ret float %exp
+}
+
+define half @test_exp_f16_fast_minsize(half %arg) #5 {
+; CHECK-LABEL: define half @test_exp_f16_fast_minsize
+; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call half @_Z3expDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP]]
+;
+  %exp = tail call half @_Z3expDh(half %arg)
+  ret half %exp
+}
+
+define float @test_exp_f32_strictfp(float %arg) #6 {
+; CHECK-LABEL: define float @test_exp_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz float @_Z3expf(float [[ARG]]) #[[ATTR4]]
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nsz float @_Z3expf(float %arg) #6
+  ret float %exp
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { noinline }
+attributes #4 = { optsize }
+attributes #5 = { minsize }
+attributes #6 = { strictfp }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll
@@ -0,0 +1,561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4exp2f(float)
+declare <2 x float> @_Z4exp2Dv2_f(<2 x float>)
+declare <3 x float> @_Z4exp2Dv3_f(<3 x float>)
+declare <4 x float> @_Z4exp2Dv4_f(<4 x float>)
+declare <8 x float> @_Z4exp2Dv8_f(<8 x float>)
+declare <16 x float> @_Z4exp2Dv16_f(<16 x float>)
+
+declare double @_Z4exp2d(double)
+declare <2 x double> @_Z4exp2Dv2_d(<2 x double>)
+declare <3 x double> @_Z4exp2Dv3_d(<3 x double>)
+declare <4 x double> @_Z4exp2Dv4_d(<4 x double>)
+declare <8 x double> @_Z4exp2Dv8_d(<8 x double>)
+declare <16 x double> @_Z4exp2Dv16_d(<16 x double>)
+
+declare half @_Z4exp2Dh(half)
+declare <2 x half> @_Z4exp2Dv2_Dh(<2 x half>)
+declare <3 x half> @_Z4exp2Dv3_Dh(<3 x half>)
+declare <4 x half> @_Z4exp2Dv4_Dh(<4 x half>)
+declare <8 x half> @_Z4exp2Dv8_Dh(<8 x half>)
+declare <16 x half> @_Z4exp2Dv16_Dh(<16 x half>)
+
+define float @test_exp2_f32(float %arg) {
+; CHECK-LABEL: define float @test_exp2_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %exp2
+}
+
+define <3 x float> @test_exp2_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_exp2_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[EXP2]]
+;
+  %exp2 = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %exp2
+}
+
+define <4 x float> @test_exp2_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_exp2_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[EXP2]]
+;
+  %exp2 = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %exp2
+}
+
+define <8 x float> @test_exp2_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_exp2_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[EXP2]]
+;
+  %exp2 = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %exp2
+}
+
+define <16 x float> @test_exp2_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_exp2_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[EXP2]]
+;
+  %exp2 = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %exp2
+}
+
+define float @test_exp2_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_exp2_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg)
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg)
+  ret <2 x float> %exp2
+}
+
+define <3 x float> @test_exp2_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_exp2_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[EXP2]]
+;
+  %exp2 = tail call <3 x float> @_Z4exp2Dv3_f(<3 x float> %arg)
+  ret <3 x float> %exp2
+}
+
+define <4 x float> @test_exp2_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_exp2_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[EXP2]]
+;
+  %exp2 = tail call <4 x float> @_Z4exp2Dv4_f(<4 x float> %arg)
+  ret <4 x float> %exp2
+}
+
+define <8 x float> @test_exp2_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_exp2_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[EXP2]]
+;
+  %exp2 = tail call <8 x float> @_Z4exp2Dv8_f(<8 x float> %arg)
+  ret <8 x float> %exp2
+}
+
+define <16 x float> @test_exp2_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_exp2_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[EXP2]]
+;
+  %exp2 = tail call <16 x float> @_Z4exp2Dv16_f(<16 x float> %arg)
+  ret <16 x float> %exp2
+}
+
+define double @test_exp2_f64(double %arg) {
+; CHECK-LABEL: define double @test_exp2_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call double @_Z4exp2d(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %exp2 = tail call double @_Z4exp2d(double %arg)
+  ret double %exp2
+}
+
+define <2 x double> @test_exp2_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_exp2_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @_Z4exp2Dv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[EXP2]]
+;
+  %exp2 = tail call <2 x double> @_Z4exp2Dv2_d(<2 x double> %arg)
+  ret <2 x double> %exp2
+}
+
+define <3 x double> @test_exp2_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_exp2_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <3 x double> @_Z4exp2Dv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[EXP2]]
+;
+  %exp2 = tail call <3 x double> @_Z4exp2Dv3_d(<3 x double> %arg)
+  ret <3 x double> %exp2
+}
+
+define <4 x double> @test_exp2_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_exp2_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <4 x double> @_Z4exp2Dv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[EXP2]]
+;
+  %exp2 = tail call <4 x double> @_Z4exp2Dv4_d(<4 x double> %arg)
+  ret <4 x double> %exp2
+}
+
+define <8 x double> @test_exp2_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_exp2_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <8 x double> @_Z4exp2Dv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[EXP2]]
+;
+  %exp2 = tail call <8 x double> @_Z4exp2Dv8_d(<8 x double> %arg)
+  ret <8 x double> %exp2
+}
+
+define <16 x double> @test_exp2_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_exp2_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <16 x double> @_Z4exp2Dv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[EXP2]]
+;
+  %exp2 = tail call <16 x double> @_Z4exp2Dv16_d(<16 x double> %arg)
+  ret <16 x double> %exp2
+}
+
+define half @test_exp2_f16(half %arg) {
+; CHECK-LABEL: define half @test_exp2_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call half @_Z4exp2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP2]]
+;
+  %exp2 = tail call half @_Z4exp2Dh(half %arg)
+  ret half %exp2
+}
+
+define half @test_exp2_f16_fast(half %arg) {
+; CHECK-LABEL: define half @test_exp2_f16_fast
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast half @_Z4exp2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP2]]
+;
+  %exp2 = tail call fast half @_Z4exp2Dh(half %arg)
+  ret half %exp2
+}
+
+define <2 x half> @test_exp2_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_exp2_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @_Z4exp2Dv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[EXP2]]
+;
+  %exp2 = tail call <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %arg)
+  ret <2 x half> %exp2
+}
+
+define <3 x half> @test_exp2_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_exp2_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <3 x half> @_Z4exp2Dv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[EXP2]]
+;
+  %exp2 = tail call <3 x half> @_Z4exp2Dv3_Dh(<3 x half> %arg)
+  ret <3 x half> %exp2
+}
+
+define <4 x half> @test_exp2_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_exp2_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <4 x half> @_Z4exp2Dv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[EXP2]]
+;
+  %exp2 = tail call <4 x half> @_Z4exp2Dv4_Dh(<4 x half> %arg)
+  ret <4 x half> %exp2
+}
+
+define <8 x half> @test_exp2_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_exp2_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <8 x half> @_Z4exp2Dv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[EXP2]]
+;
+  %exp2 = tail call <8 x half> @_Z4exp2Dv8_Dh(<8 x half> %arg)
+  ret <8 x half> %exp2
+}
+
+define <16 x half> @test_exp2_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_exp2_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <16 x half> @_Z4exp2Dv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[EXP2]]
+;
+  %exp2 = tail call <16 x half> @_Z4exp2Dv16_Dh(<16 x half> %arg)
+  ret <16 x half> %exp2
+}
+
+define float @test_exp2_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_exp2_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %exp2
+}
+
+define float @test_exp2_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_exp2_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg) #0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %exp2
+}
+
+; "no-builtins" should be ignored
+define float @test_exp2_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_exp2_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %exp2
+}
+
+define float @test_exp2_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_exp2_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @_Z4exp2f(float %arg) #0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %exp2
+}
+
+define float @test_exp2_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_exp2_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call nnan ninf float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call contract nsz nnan <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %exp2
+}
+
+define float @test_exp2_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_exp2_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan ninf float @_Z4exp2f(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call nnan ninf float @_Z4exp2f(float %arg), !fpmath !0, !foo !1
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call contract nsz nnan <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %exp2
+}
+
+define float @test_exp2_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_exp2_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call ninf contract float @_Z4exp2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call ninf contract float @_Z4exp2f(float %arg)
+  ret float %exp2
+}
+
+define <2 x float> @test_exp2_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nnan nsz <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+;
+  %exp2 = tail call nnan nsz <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg)
+  ret <2 x float> %exp2
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @exp2f(float) #2
+declare double @exp2(double) #2
+
+define float @test_libm_exp2_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp2_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @exp2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @exp2f(float %arg)
+  ret float %exp2
+}
+
+define float @test_libm_exp2_f32_fast(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp2_f32_fast
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @exp2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call fast float @exp2f(float %arg)
+  ret float %exp2
+}
+
+define float @test_libm_exp2_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_exp2_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call float @exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define double @test_libm_exp2_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp2_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call double @exp2(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %exp2 = tail call double @exp2(double %arg)
+  ret double %exp2
+}
+
+define double @test_libm_exp2_f64_fast(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp2_f64_fast
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast double @exp2(double [[ARG]])
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %exp2 = tail call fast double @exp2(double %arg)
+  ret double %exp2
+}
+
+define double @test_libm_exp2_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_exp2_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call double @exp2(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %exp2 = tail call double @exp2(double %arg), !fpmath !0
+  ret double %exp2
+}
+
+define float @test_exp2_f32_fast_noinline(float %arg) {
+; CHECK-LABEL: define float @test_exp2_f32_fast_noinline
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call fast float @_Z4exp2f(float %arg) #3, !fpmath !0
+  ret float %exp2
+}
+
+define float @test_exp2_f32_fast_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_exp2_f32_fast_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call fast float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define float @test_exp2_f32_fast_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_exp2_f32_fast_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call fast float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define float @test_exp2_f32_nsz_contract_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz contract float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call nsz contract float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define float @test_exp2_f32_nsz_contract_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz contract float @_Z4exp2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %exp2 = tail call nsz contract float @_Z4exp2f(float %arg), !fpmath !0
+  ret float %exp2
+}
+
+define half @test_exp2_f16_fast_minsize(half %arg) #5 {
+; CHECK-LABEL: define half @test_exp2_f16_fast_minsize
+; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast half @_Z4exp2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[EXP2]]
+;
+  %exp2 = tail call fast half @_Z4exp2Dh(half %arg)
+  ret half %exp2
+}
+
+define float @test_exp2_f32_strictfp(float %arg) #6 {
+; CHECK-LABEL: define float @test_exp2_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz float @_Z4exp2f(float [[ARG]]) #[[ATTR4]]
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %exp = tail call nsz float @_Z4exp2f(float %arg) #6
+  ret float %exp
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { noinline }
+attributes #4 = { optsize }
+attributes #5 = { minsize }
+attributes #6 = { strictfp }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4fabsf(float)
+declare <2 x float> @_Z4fabsDv2_f(<2 x float>)
+declare <3 x float> @_Z4fabsDv3_f(<3 x float>)
+declare <4 x float> @_Z4fabsDv4_f(<4 x float>)
+declare <8 x float> @_Z4fabsDv8_f(<8 x float>)
+declare <16 x float> @_Z4fabsDv16_f(<16 x float>)
+
+declare double @_Z4fabsd(double)
+declare <2 x double> @_Z4fabsDv2_d(<2 x double>)
+declare <3 x double> @_Z4fabsDv3_d(<3 x double>)
+declare <4 x double> @_Z4fabsDv4_d(<4 x double>)
+declare <8 x double> @_Z4fabsDv8_d(<8 x double>)
+declare <16 x double> @_Z4fabsDv16_d(<16 x double>)
+
+declare half @_Z4fabsDh(half)
+declare <2 x half> @_Z4fabsDv2_Dh(<2 x half>)
+declare <3 x half> @_Z4fabsDv3_Dh(<3 x half>)
+declare <4 x half> @_Z4fabsDv4_Dh(<4 x half>)
+declare <8 x half> @_Z4fabsDv8_Dh(<8 x half>)
+declare <16 x half> @_Z4fabsDv16_Dh(<16 x half>)
+
+define float @test_fabs_f32(float %arg) {
+; CHECK-LABEL: define float @test_fabs_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call float @_Z4fabsf(float %arg)
+  ret float %fabs
+}
+
+define <2 x float> @test_fabs_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_fabs_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[FABS]]
+;
+  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg)
+  ret <2 x float> %fabs
+}
+
+define <3 x float> @test_fabs_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_fabs_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[FABS]]
+;
+  %fabs = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> %arg)
+  ret <3 x float> %fabs
+}
+
+define <4 x float> @test_fabs_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_fabs_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[FABS]]
+;
+  %fabs = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> %arg)
+  ret <4 x float> %fabs
+}
+
+define <8 x float> @test_fabs_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_fabs_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[FABS]]
+;
+  %fabs = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> %arg)
+  ret <8 x float> %fabs
+}
+
+define <16 x float> @test_fabs_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_fabs_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[FABS]]
+;
+  %fabs = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> %arg)
+  ret <16 x float> %fabs
+}
+
+define double @test_fabs_f64(double %arg) {
+; CHECK-LABEL: define double @test_fabs_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call double @_Z4fabsd(double [[ARG]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %fabs = tail call double @_Z4fabsd(double %arg)
+  ret double %fabs
+}
+
+define <2 x double> @test_fabs_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_fabs_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[FABS]]
+;
+  %fabs = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> %arg)
+  ret <2 x double> %fabs
+}
+
+define <3 x double> @test_fabs_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_fabs_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[FABS]]
+;
+  %fabs = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> %arg)
+  ret <3 x double> %fabs
+}
+
+define <4 x double> @test_fabs_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_fabs_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[FABS]]
+;
+  %fabs = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> %arg)
+  ret <4 x double> %fabs
+}
+
+define <8 x double> @test_fabs_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_fabs_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[FABS]]
+;
+  %fabs = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> %arg)
+  ret <8 x double> %fabs
+}
+
+define <16 x double> @test_fabs_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_fabs_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[FABS]]
+;
+  %fabs = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> %arg)
+  ret <16 x double> %fabs
+}
+
+define half @test_fabs_f16(half %arg) {
+; CHECK-LABEL: define half @test_fabs_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call half @_Z4fabsDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[FABS]]
+;
+  %fabs = tail call half @_Z4fabsDh(half %arg)
+  ret half %fabs
+}
+
+define <2 x half> @test_fabs_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_fabs_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[FABS]]
+;
+  %fabs = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %fabs
+}
+
+define <3 x half> @test_fabs_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_fabs_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[FABS]]
+;
+  %fabs = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %fabs
+}
+
+define <4 x half> @test_fabs_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_fabs_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[FABS]]
+;
+  %fabs = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %fabs
+}
+
+define <8 x half> @test_fabs_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_fabs_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[FABS]]
+;
+  %fabs = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %fabs
+}
+
+define <16 x half> @test_fabs_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_fabs_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[FABS]]
+;
+  %fabs = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %fabs
+}
+
+define float @test_fabs_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_fabs_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call float @_Z4fabsf(float %arg) #0
+  ret float %fabs
+}
+
+define <2 x float> @test_fabs_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[FABS]]
+;
+  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %fabs
+}
+
+; "no-builtins" should be ignored
+define float @test_fabs_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_fabs_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call float @_Z4fabsf(float %arg) #0
+  ret float %fabs
+}
+
+define <2 x float> @test_fabs_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[FABS]]
+;
+  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %fabs
+}
+
+define float @test_fabs_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_fabs_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call nnan ninf float @_Z4fabsf(float %arg)
+  ret float %fabs
+}
+
+define <2 x float> @test_fabs_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[FABS]]
+;
+  %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg)
+  ret <2 x float> %fabs
+}
+
+define float @test_fabs_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_fabs_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call nnan ninf float @_Z4fabsf(float %arg), !foo !0
+  ret float %fabs
+}
+
+define <2 x float> @test_fabs_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[FABS]]
+;
+  %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %fabs
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @fabsf(float) #2
+declare double @fabs(double) #2
+
+define float @test_libm_fabs_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_fabs_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @fabsf(float [[ARG]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call float @fabsf(float %arg)
+  ret float %fabs
+}
+
+define double @test_libm_fabs_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_fabs_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call double @fabs(double [[ARG]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %fabs = tail call double @fabs(double %arg)
+  ret double %fabs
+}
+
+define float @test_fabs_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_fabs_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan float @_Z4fabsf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %fabs = tail call nnan float @_Z4fabsf(float %arg) #3
+  ret float %fabs
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z5floorf(float)
+declare <2 x float> @_Z5floorDv2_f(<2 x float>)
+declare <3 x float> @_Z5floorDv3_f(<3 x float>)
+declare <4 x float> @_Z5floorDv4_f(<4 x float>)
+declare <8 x float> @_Z5floorDv8_f(<8 x float>)
+declare <16 x float> @_Z5floorDv16_f(<16 x float>)
+
+declare double @_Z5floord(double)
+declare <2 x double> @_Z5floorDv2_d(<2 x double>)
+declare <3 x double> @_Z5floorDv3_d(<3 x double>)
+declare <4 x double> @_Z5floorDv4_d(<4 x double>)
+declare <8 x double> @_Z5floorDv8_d(<8 x double>)
+declare <16 x double> @_Z5floorDv16_d(<16 x double>)
+
+declare half @_Z5floorDh(half)
+declare <2 x half> @_Z5floorDv2_Dh(<2 x half>)
+declare <3 x half> @_Z5floorDv3_Dh(<3 x half>)
+declare <4 x half> @_Z5floorDv4_Dh(<4 x half>)
+declare <8 x half> @_Z5floorDv8_Dh(<8 x half>)
+declare <16 x half> @_Z5floorDv16_Dh(<16 x half>)
+
+define float @test_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5floorf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define <3 x float> @test_rint_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_rint_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x float> @_Z5floorDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[RINT]]
+;
+  %rint = tail call <3 x float> @_Z5floorDv3_f(<3 x float> %arg)
+  ret <3 x float> %rint
+}
+
+define <4 x float> @test_rint_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_rint_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x float> @_Z5floorDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[RINT]]
+;
+  %rint = tail call <4 x float> @_Z5floorDv4_f(<4 x float> %arg)
+  ret <4 x float> %rint
+}
+
+define <8 x float> @test_rint_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_rint_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x float> @_Z5floorDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[RINT]]
+;
+  %rint = tail call <8 x float> @_Z5floorDv8_f(<8 x float> %arg)
+  ret <8 x float> %rint
+}
+
+define <16 x float> @test_rint_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_rint_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x float> @_Z5floorDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[RINT]]
+;
+  %rint = tail call <16 x float> @_Z5floorDv16_f(<16 x float> %arg)
+  ret <16 x float> %rint
+}
+
+define double @test_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @_Z5floord(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @_Z5floord(double %arg)
+  ret double %rint
+}
+
+define <2 x double> @test_rint_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_rint_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x double> @_Z5floorDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[RINT]]
+;
+  %rint = tail call <2 x double> @_Z5floorDv2_d(<2 x double> %arg)
+  ret <2 x double> %rint
+}
+
+define <3 x double> @test_rint_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_rint_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x double> @_Z5floorDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[RINT]]
+;
+  %rint = tail call <3 x double> @_Z5floorDv3_d(<3 x double> %arg)
+  ret <3 x double> %rint
+}
+
+define <4 x double> @test_rint_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_rint_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x double> @_Z5floorDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[RINT]]
+;
+  %rint = tail call <4 x double> @_Z5floorDv4_d(<4 x double> %arg)
+  ret <4 x double> %rint
+}
+
+define <8 x double> @test_rint_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_rint_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x double> @_Z5floorDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[RINT]]
+;
+  %rint = tail call <8 x double> @_Z5floorDv8_d(<8 x double> %arg)
+  ret <8 x double> %rint
+}
+
+define <16 x double> @test_rint_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_rint_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x double> @_Z5floorDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[RINT]]
+;
+  %rint = tail call <16 x double> @_Z5floorDv16_d(<16 x double> %arg)
+  ret <16 x double> %rint
+}
+
+define half @test_rint_f16(half %arg) {
+; CHECK-LABEL: define half @test_rint_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call half @_Z5floorDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[RINT]]
+;
+  %rint = tail call half @_Z5floorDh(half %arg)
+  ret half %rint
+}
+
+define <2 x half> @test_rint_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_rint_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x half> @_Z5floorDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[RINT]]
+;
+  %rint = tail call <2 x half> @_Z5floorDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %rint
+}
+
+define <3 x half> @test_rint_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_rint_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x half> @_Z5floorDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[RINT]]
+;
+  %rint = tail call <3 x half> @_Z5floorDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %rint
+}
+
+define <4 x half> @test_rint_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_rint_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x half> @_Z5floorDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[RINT]]
+;
+  %rint = tail call <4 x half> @_Z5floorDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %rint
+}
+
+define <8 x half> @test_rint_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_rint_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x half> @_Z5floorDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[RINT]]
+;
+  %rint = tail call <8 x half> @_Z5floorDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %rint
+}
+
+define <16 x half> @test_rint_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_rint_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x half> @_Z5floorDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[RINT]]
+;
+  %rint = tail call <16 x half> @_Z5floorDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %rint
+}
+
+define float @test_rint_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5floorf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+; "no-builtins" should be ignored
+define float @test_rint_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5floorf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5floorf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5floorf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5floorDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5floorf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5floorf(float %arg), !foo !0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5floorDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %rint
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @rintf(float) #2
+declare double @rint(double) #2
+
+define float @test_libm_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @rintf(float %arg)
+  ret float %rint
+}
+
+define double @test_libm_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @rint(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @rint(double %arg)
+  ret double %rint
+}
+
+define float @test_rint_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_rint_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5floorf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan float @_Z5floorf(float %arg) #3
+  ret float %rint
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3fmafff(float, float, float)
+declare <2 x float> @_Z3fmaDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>)
+declare <3 x float> @_Z3fmaDv3_fS_S_(<3 x float>, <3 x float>, <3 x float>)
+declare <4 x float> @_Z3fmaDv4_fS_S_(<4 x float>, <4 x float>, <4 x float>)
+declare <8 x float> @_Z3fmaDv8_fS_S_(<8 x float>, <8 x float>, <8 x float>)
+declare <16 x float> @_Z3fmaDv16_fS_S_(<16 x float>, <16 x float>, <16 x float>)
+declare double @_Z3fmaddd(double, double, double)
+declare <2 x double> @_Z3fmaDv2_dS_S_(<2 x double>, <2 x double>, <2 x double>)
+declare <3 x double> @_Z3fmaDv3_dS_S_(<3 x double>, <3 x double>, <3 x double>)
+declare <4 x double> @_Z3fmaDv4_dS_S_(<4 x double>, <4 x double>, <4 x double>)
+declare <8 x double> @_Z3fmaDv8_dS_S_(<8 x double>, <8 x double>, <8 x double>)
+declare <16 x double> @_Z3fmaDv16_dS_S_(<16 x double>, <16 x double>, <16 x double>)
+declare half @_Z3fmaDhDhDh(half, half, half)
+declare <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half>, <2 x half>, <2 x half>)
+declare <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half>, <3 x half>, <3 x half>)
+declare <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half>, <4 x half>, <4 x half>)
+declare <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half>, <8 x half>, <8 x half>)
+declare <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half>, <16 x half>, <16 x half>)
+
+define float @test_fma_f32(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_fma_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call float @_Z3fmafff(float %x, float %y, float %z)
+  ret float %fma
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_fma_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> [[X]], <2 x float> [[Y]], <2 x float> [[Z]])
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %fma = tail call <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %x, <2 x float> %y, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+define <3 x float> @test_fma_v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z) {
+; CHECK-LABEL: define <3 x float> @test_fma_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]], <3 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> [[X]], <3 x float> [[Y]], <3 x float> [[Z]])
+; CHECK-NEXT:    ret <3 x float> [[FMA]]
+;
+  %fma = tail call <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %x, <3 x float> %y, <3 x float> %z)
+  ret <3 x float> %fma
+}
+
+define <4 x float> @test_fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <4 x float> @test_fma_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> [[X]], <4 x float> [[Y]], <4 x float> [[Z]])
+; CHECK-NEXT:    ret <4 x float> [[FMA]]
+;
+  %fma = tail call <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %x, <4 x float> %y, <4 x float> %z)
+  ret <4 x float> %fma
+}
+
+define <8 x float> @test_fma_v8f32(<8 x float> %x, <8 x float> %y, <8 x float> %z) {
+; CHECK-LABEL: define <8 x float> @test_fma_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]], <8 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> [[X]], <8 x float> [[Y]], <8 x float> [[Z]])
+; CHECK-NEXT:    ret <8 x float> [[FMA]]
+;
+  %fma = tail call <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %x, <8 x float> %y, <8 x float> %z)
+  ret <8 x float> %fma
+}
+
+define <16 x float> @test_fma_v16f32(<16 x float> %x, <16 x float> %y, <16 x float> %z) {
+; CHECK-LABEL: define <16 x float> @test_fma_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]], <16 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> [[X]], <16 x float> [[Y]], <16 x float> [[Z]])
+; CHECK-NEXT:    ret <16 x float> [[FMA]]
+;
+  %fma = tail call <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %x, <16 x float> %y, <16 x float> %z)
+  ret <16 x float> %fma
+}
+
+define double @test_fma_f64(double %x, double %y, double %z) {
+; CHECK-LABEL: define double @test_fma_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call double @_Z3fmaddd(double [[X]], double [[Y]], double [[Z]])
+; CHECK-NEXT:    ret double [[FMA]]
+;
+  %fma = tail call double @_Z3fmaddd(double %x, double %y, double %z)
+  ret double %fma
+}
+
+define <2 x double> @test_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <2 x double> @test_fma_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> [[X]], <2 x double> [[Y]], <2 x double> [[Z]])
+; CHECK-NEXT:    ret <2 x double> [[FMA]]
+;
+  %fma = tail call <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %x, <2 x double> %y, <2 x double> %z)
+  ret <2 x double> %fma
+}
+
+define <3 x double> @test_fma_v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z) {
+; CHECK-LABEL: define <3 x double> @test_fma_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]], <3 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> [[X]], <3 x double> [[Y]], <3 x double> [[Z]])
+; CHECK-NEXT:    ret <3 x double> [[FMA]]
+;
+  %fma = tail call <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> %x, <3 x double> %y, <3 x double> %z)
+  ret <3 x double> %fma
+}
+
+define <4 x double> @test_fma_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
+; CHECK-LABEL: define <4 x double> @test_fma_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]], <4 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> [[X]], <4 x double> [[Y]], <4 x double> [[Z]])
+; CHECK-NEXT:    ret <4 x double> [[FMA]]
+;
+  %fma = tail call <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> %x, <4 x double> %y, <4 x double> %z)
+  ret <4 x double> %fma
+}
+
+define <8 x double> @test_fma_v8f64(<8 x double> %x, <8 x double> %y, <8 x double> %z) {
+; CHECK-LABEL: define <8 x double> @test_fma_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]], <8 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> [[X]], <8 x double> [[Y]], <8 x double> [[Z]])
+; CHECK-NEXT:    ret <8 x double> [[FMA]]
+;
+  %fma = tail call <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> %x, <8 x double> %y, <8 x double> %z)
+  ret <8 x double> %fma
+}
+
+define <16 x double> @test_fma_v16f64(<16 x double> %x, <16 x double> %y, <16 x double> %z) {
+; CHECK-LABEL: define <16 x double> @test_fma_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]], <16 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> [[X]], <16 x double> [[Y]], <16 x double> [[Z]])
+; CHECK-NEXT:    ret <16 x double> [[FMA]]
+;
+  %fma = tail call <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> %x, <16 x double> %y, <16 x double> %z)
+  ret <16 x double> %fma
+}
+
+define half @test_fma_f16(half %x, half %y, half %z) {
+; CHECK-LABEL: define half @test_fma_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call half @_Z3fmaDhDhDh(half [[X]], half [[Y]], half [[Z]])
+; CHECK-NEXT:    ret half [[FMA]]
+;
+  %fma = tail call half @_Z3fmaDhDhDh(half %x, half %y, half %z)
+  ret half %fma
+}
+
+define <2 x half> @test_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
+; CHECK-LABEL: define <2 x half> @test_fma_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]], <2 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half> [[X]], <2 x half> [[Y]], <2 x half> [[Z]])
+; CHECK-NEXT:    ret <2 x half> [[FMA]]
+;
+  %fma = tail call <2 x half> @_Z3fmaDv2_DhS_S_(<2 x half> %x, <2 x half> %y, <2 x half> %z)
+  ret <2 x half> %fma
+}
+
+define <3 x half> @test_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
+; CHECK-LABEL: define <3 x half> @test_fma_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half> [[X]], <3 x half> [[Y]], <3 x half> [[Z]])
+; CHECK-NEXT:    ret <3 x half> [[FMA]]
+;
+  %fma = tail call <3 x half> @_Z3fmaDv3_DhS_S_(<3 x half> %x, <3 x half> %y, <3 x half> %z)
+  ret <3 x half> %fma
+}
+
+define <4 x half> @test_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
+; CHECK-LABEL: define <4 x half> @test_fma_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]], <4 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half> [[X]], <4 x half> [[Y]], <4 x half> [[Z]])
+; CHECK-NEXT:    ret <4 x half> [[FMA]]
+;
+  %fma = tail call <4 x half> @_Z3fmaDv4_DhS_S_(<4 x half> %x, <4 x half> %y, <4 x half> %z)
+  ret <4 x half> %fma
+}
+
+define <8 x half> @test_fma_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
+; CHECK-LABEL: define <8 x half> @test_fma_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half> [[X]], <8 x half> [[Y]], <8 x half> [[Z]])
+; CHECK-NEXT:    ret <8 x half> [[FMA]]
+;
+  %fma = tail call <8 x half> @_Z3fmaDv8_DhS_S_(<8 x half> %x, <8 x half> %y, <8 x half> %z)
+  ret <8 x half> %fma
+}
+
+define <16 x half> @test_fma_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
+; CHECK-LABEL: define <16 x half> @test_fma_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]], <16 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half> [[X]], <16 x half> [[Y]], <16 x half> [[Z]])
+; CHECK-NEXT:    ret <16 x half> [[FMA]]
+;
+  %fma = tail call <16 x half> @_Z3fmaDv16_DhS_S_(<16 x half> %x, <16 x half> %y, <16 x half> %z)
+  ret <16 x half> %fma
+}
+
+define float @test_fma_f32_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_fma_f32_fast
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z)
+  ret float %fma
+}
+
+define float @test_fma_f32_noinline(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_fma_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #1
+  ret float %fma
+}
+
+define float @test_fma_f32_fast_minsize(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: define float @test_fma_f32_fast_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z)
+  ret float %fma
+}
+
+define float @test_fma_f32_fast_strictfp(float %x, float %y, float %z) #2 {
+; CHECK-LABEL: define float @test_fma_f32_fast_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call nnan nsz float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call nsz nnan float @_Z3fmafff(float %x, float %y, float %z) #2
+  ret float %fma
+}
+
+define float @test_fma_f32_fast_nobuiltin(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_fma_f32_fast_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #3
+  ret float %fma
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4fmaxff(float, float)
+declare <2 x float> @_Z4fmaxDv2_fS_(<2 x float>, <2 x float>)
+declare <3 x float> @_Z4fmaxDv3_fS_(<3 x float>, <3 x float>)
+declare <4 x float> @_Z4fmaxDv4_fS_(<4 x float>, <4 x float>)
+declare <8 x float> @_Z4fmaxDv8_fS_(<8 x float>, <8 x float>)
+declare <16 x float> @_Z4fmaxDv16_fS_(<16 x float>, <16 x float>)
+declare double @_Z4fmaxdd(double, double)
+declare <2 x double> @_Z4fmaxDv2_dS_(<2 x double>, <2 x double>)
+declare <3 x double> @_Z4fmaxDv3_dS_(<3 x double>, <3 x double>)
+declare <4 x double> @_Z4fmaxDv4_dS_(<4 x double>, <4 x double>)
+declare <8 x double> @_Z4fmaxDv8_dS_(<8 x double>, <8 x double>)
+declare <16 x double> @_Z4fmaxDv16_dS_(<16 x double>, <16 x double>)
+declare half @_Z4fmaxDhDh(half, half)
+declare <2 x half> @_Z4fmaxDv2_DhS_(<2 x half>, <2 x half>)
+declare <3 x half> @_Z4fmaxDv3_DhS_(<3 x half>, <3 x half>)
+declare <4 x half> @_Z4fmaxDv4_DhS_(<4 x half>, <4 x half>)
+declare <8 x half> @_Z4fmaxDv8_DhS_(<8 x half>, <8 x half>)
+declare <16 x half> @_Z4fmaxDv16_DhS_(<16 x half>, <16 x half>)
+
+define float @test_fmax_f32(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmax_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call float @_Z4fmaxff(float %x, float %y)
+  ret float %fmax
+}
+
+define float @test_fmax_f32_nnan(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmax_f32_nnan
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y)
+  ret float %fmax
+}
+
+define <2 x float> @test_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_fmax_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[FMAX]]
+;
+  %fmax = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %fmax
+}
+
+define <3 x float> @test_fmax_v3f32(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_fmax_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[FMAX]]
+;
+  %fmax = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %x, <3 x float> %y)
+  ret <3 x float> %fmax
+}
+
+define <4 x float> @test_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_fmax_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[FMAX]]
+;
+  %fmax = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %fmax
+}
+
+define <8 x float> @test_fmax_v8f32(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: define <8 x float> @test_fmax_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <8 x float> @_Z4fmaxDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[FMAX]]
+;
+  %fmax = tail call <8 x float> @_Z4fmaxDv8_fS_(<8 x float> %x, <8 x float> %y)
+  ret <8 x float> %fmax
+}
+
+define <16 x float> @test_fmax_v16f32(<16 x float> %x, <16 x float> %y) {
+; CHECK-LABEL: define <16 x float> @test_fmax_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <16 x float> @_Z4fmaxDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[FMAX]]
+;
+  %fmax = tail call <16 x float> @_Z4fmaxDv16_fS_(<16 x float> %x, <16 x float> %y)
+  ret <16 x float> %fmax
+}
+
+define double @test_fmax_f64(double %x, double %y) {
+; CHECK-LABEL: define double @test_fmax_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call double @_Z4fmaxdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[FMAX]]
+;
+  %fmax = tail call double @_Z4fmaxdd(double %x, double %y)
+  ret double %fmax
+}
+
+define <2 x double> @test_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_fmax_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <2 x double> @_Z4fmaxDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[FMAX]]
+;
+  %fmax = tail call <2 x double> @_Z4fmaxDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %fmax
+}
+
+define <3 x double> @test_fmax_v3f64(<3 x double> %x, <3 x double> %y) {
+; CHECK-LABEL: define <3 x double> @test_fmax_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <3 x double> @_Z4fmaxDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[FMAX]]
+;
+  %fmax = tail call <3 x double> @_Z4fmaxDv3_dS_(<3 x double> %x, <3 x double> %y)
+  ret <3 x double> %fmax
+}
+
+define <4 x double> @test_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: define <4 x double> @test_fmax_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <4 x double> @_Z4fmaxDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[FMAX]]
+;
+  %fmax = tail call <4 x double> @_Z4fmaxDv4_dS_(<4 x double> %x, <4 x double> %y)
+  ret <4 x double> %fmax
+}
+
+define <8 x double> @test_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: define <8 x double> @test_fmax_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <8 x double> @_Z4fmaxDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[FMAX]]
+;
+  %fmax = tail call <8 x double> @_Z4fmaxDv8_dS_(<8 x double> %x, <8 x double> %y)
+  ret <8 x double> %fmax
+}
+
+define <16 x double> @test_fmax_v16f64(<16 x double> %x, <16 x double> %y) {
+; CHECK-LABEL: define <16 x double> @test_fmax_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <16 x double> @_Z4fmaxDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[FMAX]]
+;
+  %fmax = tail call <16 x double> @_Z4fmaxDv16_dS_(<16 x double> %x, <16 x double> %y)
+  ret <16 x double> %fmax
+}
+
+define half @test_fmax_f16(half %x, half %y) {
+; CHECK-LABEL: define half @test_fmax_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call half @_Z4fmaxDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[FMAX]]
+;
+  %fmax = tail call half @_Z4fmaxDhDh(half %x, half %y)
+  ret half %fmax
+}
+
+define <2 x half> @test_fmax_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_fmax_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <2 x half> @_Z4fmaxDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[FMAX]]
+;
+  %fmax = tail call <2 x half> @_Z4fmaxDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %fmax
+}
+
+define <3 x half> @test_fmax_v3f16(<3 x half> %x, <3 x half> %y) {
+; CHECK-LABEL: define <3 x half> @test_fmax_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <3 x half> @_Z4fmaxDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[FMAX]]
+;
+  %fmax = tail call <3 x half> @_Z4fmaxDv3_DhS_(<3 x half> %x, <3 x half> %y)
+  ret <3 x half> %fmax
+}
+
+define <4 x half> @test_fmax_v4f16(<4 x half> %x, <4 x half> %y) {
+; CHECK-LABEL: define <4 x half> @test_fmax_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <4 x half> @_Z4fmaxDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[FMAX]]
+;
+  %fmax = tail call <4 x half> @_Z4fmaxDv4_DhS_(<4 x half> %x, <4 x half> %y)
+  ret <4 x half> %fmax
+}
+
+define <8 x half> @test_fmax_v8f16(<8 x half> %x, <8 x half> %y) {
+; CHECK-LABEL: define <8 x half> @test_fmax_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <8 x half> @_Z4fmaxDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[FMAX]]
+;
+  %fmax = tail call <8 x half> @_Z4fmaxDv8_DhS_(<8 x half> %x, <8 x half> %y)
+  ret <8 x half> %fmax
+}
+
+define <16 x half> @test_fmax_v16f16(<16 x half> %x, <16 x half> %y) {
+; CHECK-LABEL: define <16 x half> @test_fmax_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call <16 x half> @_Z4fmaxDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[FMAX]]
+;
+  %fmax = tail call <16 x half> @_Z4fmaxDv16_DhS_(<16 x half> %x, <16 x half> %y)
+  ret <16 x half> %fmax
+}
+
+define float @test_fmax_f32_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_fmax_f32_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call float @_Z4fmaxff(float %x, float %y)
+  ret float %fmax
+}
+
+define float @test_fmax_f32_nnan_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_fmax_f32_nnan_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y)
+  ret float %fmax
+}
+
+define float @test_fmax_f32_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmax_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call float @_Z4fmaxff(float %x, float %y) #1
+  ret float %fmax
+}
+
+define float @test_fmax_f32_nnan_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmax_f32_nnan_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) #1
+  ret float %fmax
+}
+
+define float @test_fmax_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @test_fmax_f32_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan nsz float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call nsz nnan float @_Z4fmaxff(float %x, float %y) #2
+  ret float %fmax
+}
+
+define float @test_fmax_f32_fast_nobuiltin(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmax_f32_fast_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMAX:%.*]] = tail call fast float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMAX]]
+;
+  %fmax = tail call fast float @_Z4fmaxff(float %x, float %y) #3
+  ret float %fmax
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4fminff(float, float)
+declare <2 x float> @_Z4fminDv2_fS_(<2 x float>, <2 x float>)
+declare <3 x float> @_Z4fminDv3_fS_(<3 x float>, <3 x float>)
+declare <4 x float> @_Z4fminDv4_fS_(<4 x float>, <4 x float>)
+declare <8 x float> @_Z4fminDv8_fS_(<8 x float>, <8 x float>)
+declare <16 x float> @_Z4fminDv16_fS_(<16 x float>, <16 x float>)
+declare double @_Z4fmindd(double, double)
+declare <2 x double> @_Z4fminDv2_dS_(<2 x double>, <2 x double>)
+declare <3 x double> @_Z4fminDv3_dS_(<3 x double>, <3 x double>)
+declare <4 x double> @_Z4fminDv4_dS_(<4 x double>, <4 x double>)
+declare <8 x double> @_Z4fminDv8_dS_(<8 x double>, <8 x double>)
+declare <16 x double> @_Z4fminDv16_dS_(<16 x double>, <16 x double>)
+declare half @_Z4fminDhDh(half, half)
+declare <2 x half> @_Z4fminDv2_DhS_(<2 x half>, <2 x half>)
+declare <3 x half> @_Z4fminDv3_DhS_(<3 x half>, <3 x half>)
+declare <4 x half> @_Z4fminDv4_DhS_(<4 x half>, <4 x half>)
+declare <8 x half> @_Z4fminDv8_DhS_(<8 x half>, <8 x half>)
+declare <16 x half> @_Z4fminDv16_DhS_(<16 x half>, <16 x half>)
+
+define float @test_fmin_f32(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmin_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call float @_Z4fminff(float %x, float %y)
+  ret float %fmin
+}
+
+define float @test_fmin_f32_nnan(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmin_f32_nnan
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call nnan float @_Z4fminff(float %x, float %y)
+  ret float %fmin
+}
+
+define <2 x float> @test_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_fmin_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[FMIN]]
+;
+  %fmin = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %fmin
+}
+
+define <3 x float> @test_fmin_v3f32(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_fmin_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[FMIN]]
+;
+  %fmin = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %x, <3 x float> %y)
+  ret <3 x float> %fmin
+}
+
+define <4 x float> @test_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_fmin_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[FMIN]]
+;
+  %fmin = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %fmin
+}
+
+define <8 x float> @test_fmin_v8f32(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: define <8 x float> @test_fmin_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <8 x float> @_Z4fminDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[FMIN]]
+;
+  %fmin = tail call <8 x float> @_Z4fminDv8_fS_(<8 x float> %x, <8 x float> %y)
+  ret <8 x float> %fmin
+}
+
+define <16 x float> @test_fmin_v16f32(<16 x float> %x, <16 x float> %y) {
+; CHECK-LABEL: define <16 x float> @test_fmin_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <16 x float> @_Z4fminDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[FMIN]]
+;
+  %fmin = tail call <16 x float> @_Z4fminDv16_fS_(<16 x float> %x, <16 x float> %y)
+  ret <16 x float> %fmin
+}
+
+define double @test_fmin_f64(double %x, double %y) {
+; CHECK-LABEL: define double @test_fmin_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call double @_Z4fmindd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[FMIN]]
+;
+  %fmin = tail call double @_Z4fmindd(double %x, double %y)
+  ret double %fmin
+}
+
+define <2 x double> @test_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_fmin_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <2 x double> @_Z4fminDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[FMIN]]
+;
+  %fmin = tail call <2 x double> @_Z4fminDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %fmin
+}
+
+define <3 x double> @test_fmin_v3f64(<3 x double> %x, <3 x double> %y) {
+; CHECK-LABEL: define <3 x double> @test_fmin_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <3 x double> @_Z4fminDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[FMIN]]
+;
+  %fmin = tail call <3 x double> @_Z4fminDv3_dS_(<3 x double> %x, <3 x double> %y)
+  ret <3 x double> %fmin
+}
+
+define <4 x double> @test_fmin_v4f64(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: define <4 x double> @test_fmin_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <4 x double> @_Z4fminDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[FMIN]]
+;
+  %fmin = tail call <4 x double> @_Z4fminDv4_dS_(<4 x double> %x, <4 x double> %y)
+  ret <4 x double> %fmin
+}
+
+define <8 x double> @test_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: define <8 x double> @test_fmin_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <8 x double> @_Z4fminDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[FMIN]]
+;
+  %fmin = tail call <8 x double> @_Z4fminDv8_dS_(<8 x double> %x, <8 x double> %y)
+  ret <8 x double> %fmin
+}
+
+define <16 x double> @test_fmin_v16f64(<16 x double> %x, <16 x double> %y) {
+; CHECK-LABEL: define <16 x double> @test_fmin_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <16 x double> @_Z4fminDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[FMIN]]
+;
+  %fmin = tail call <16 x double> @_Z4fminDv16_dS_(<16 x double> %x, <16 x double> %y)
+  ret <16 x double> %fmin
+}
+
+define half @test_fmin_f16(half %x, half %y) {
+; CHECK-LABEL: define half @test_fmin_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call half @_Z4fminDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[FMIN]]
+;
+  %fmin = tail call half @_Z4fminDhDh(half %x, half %y)
+  ret half %fmin
+}
+
+define <2 x half> @test_fmin_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_fmin_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <2 x half> @_Z4fminDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[FMIN]]
+;
+  %fmin = tail call <2 x half> @_Z4fminDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %fmin
+}
+
+define <3 x half> @test_fmin_v3f16(<3 x half> %x, <3 x half> %y) {
+; CHECK-LABEL: define <3 x half> @test_fmin_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <3 x half> @_Z4fminDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[FMIN]]
+;
+  %fmin = tail call <3 x half> @_Z4fminDv3_DhS_(<3 x half> %x, <3 x half> %y)
+  ret <3 x half> %fmin
+}
+
+define <4 x half> @test_fmin_v4f16(<4 x half> %x, <4 x half> %y) {
+; CHECK-LABEL: define <4 x half> @test_fmin_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <4 x half> @_Z4fminDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[FMIN]]
+;
+  %fmin = tail call <4 x half> @_Z4fminDv4_DhS_(<4 x half> %x, <4 x half> %y)
+  ret <4 x half> %fmin
+}
+
+define <8 x half> @test_fmin_v8f16(<8 x half> %x, <8 x half> %y) {
+; CHECK-LABEL: define <8 x half> @test_fmin_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <8 x half> @_Z4fminDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[FMIN]]
+;
+  %fmin = tail call <8 x half> @_Z4fminDv8_DhS_(<8 x half> %x, <8 x half> %y)
+  ret <8 x half> %fmin
+}
+
+define <16 x half> @test_fmin_v16f16(<16 x half> %x, <16 x half> %y) {
+; CHECK-LABEL: define <16 x half> @test_fmin_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call <16 x half> @_Z4fminDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[FMIN]]
+;
+  %fmin = tail call <16 x half> @_Z4fminDv16_DhS_(<16 x half> %x, <16 x half> %y)
+  ret <16 x half> %fmin
+}
+
+define float @test_fmin_f32_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_fmin_f32_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call float @_Z4fminff(float %x, float %y)
+  ret float %fmin
+}
+
+define float @test_fmin_f32_nnan_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_fmin_f32_nnan_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call nnan float @_Z4fminff(float %x, float %y)
+  ret float %fmin
+}
+
+define float @test_fmin_f32_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmin_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call float @_Z4fminff(float %x, float %y) #1
+  ret float %fmin
+}
+
+define float @test_fmin_f32_nnan_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmin_f32_nnan_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call nnan float @_Z4fminff(float %x, float %y) #1
+  ret float %fmin
+}
+
+define float @test_fmin_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @test_fmin_f32_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan nsz float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call nsz nnan float @_Z4fminff(float %x, float %y) #2
+  ret float %fmin
+}
+
+define float @test_fmin_f32_fast_nobuiltin(float %x, float %y) {
+; CHECK-LABEL: define float @test_fmin_f32_fast_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[FMIN:%.*]] = tail call fast float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[FMIN]]
+;
+  %fmin = tail call fast float @_Z4fminff(float %x, float %y) #3
+  ret float %fmin
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll
@@ -0,0 +1,249 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z5ldexpfi(float, i32)
+declare <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float>, <2 x i32>)
+declare <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float>, <3 x i32>)
+declare <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float>, <4 x i32>)
+declare <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float>, <8 x i32>)
+declare <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float>, <16 x i32>)
+declare double @_Z5ldexpdi(double, i32)
+declare <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double>, <2 x i32>)
+declare <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double>, <3 x i32>)
+declare <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double>, <4 x i32>)
+declare <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double>, <8 x i32>)
+declare <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double>, <16 x i32>)
+declare half @_Z5ldexpDhi(half, i32)
+declare <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half>, <2 x i32>)
+declare <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half>, <3 x i32>)
+declare <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half>, <4 x i32>)
+declare <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half>, <8 x i32>)
+declare <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half>, <16 x i32>)
+
+define float @test_ldexp_f32(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_ldexp_f32
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret float [[LDEXP]]
+;
+  %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y)
+  ret float %ldexp
+}
+
+define float @test_ldexp_f32_fast(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_ldexp_f32_fast
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call fast float @_Z5ldexpfi(float [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret float [[LDEXP]]
+;
+  %ldexp = tail call fast float @_Z5ldexpfi(float %x, i32 %y)
+  ret float %ldexp
+}
+
+define <2 x float> @test_ldexp_v2f32(<2 x float> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x float> @test_ldexp_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[LDEXP]]
+;
+  %ldexp = tail call <2 x float> @_Z5ldexpDv2_fDv2_i(<2 x float> %x, <2 x i32> %y)
+  ret <2 x float> %ldexp
+}
+
+define <3 x float> @test_ldexp_v3f32(<3 x float> %x, <3 x i32> %y) {
+; CHECK-LABEL: define <3 x float> @test_ldexp_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float> [[X]], <3 x i32> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[LDEXP]]
+;
+  %ldexp = tail call <3 x float> @_Z5ldexpDv3_fDv3_i(<3 x float> %x, <3 x i32> %y)
+  ret <3 x float> %ldexp
+}
+
+define <4 x float> @test_ldexp_v4f32(<4 x float> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x float> @test_ldexp_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[LDEXP]]
+;
+  %ldexp = tail call <4 x float> @_Z5ldexpDv4_fDv4_i(<4 x float> %x, <4 x i32> %y)
+  ret <4 x float> %ldexp
+}
+
+define <8 x float> @test_ldexp_v8f32(<8 x float> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x float> @test_ldexp_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float> [[X]], <8 x i32> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[LDEXP]]
+;
+  %ldexp = tail call <8 x float> @_Z5ldexpDv8_fDv8_i(<8 x float> %x, <8 x i32> %y)
+  ret <8 x float> %ldexp
+}
+
+define <16 x float> @test_ldexp_v16f32(<16 x float> %x, <16 x i32> %y) {
+; CHECK-LABEL: define <16 x float> @test_ldexp_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float> [[X]], <16 x i32> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[LDEXP]]
+;
+  %ldexp = tail call <16 x float> @_Z5ldexpDv16_fDv16_i(<16 x float> %x, <16 x i32> %y)
+  ret <16 x float> %ldexp
+}
+
+define double @test_ldexp_f64(double %x, i32 %y) {
+; CHECK-LABEL: define double @test_ldexp_f64
+; CHECK-SAME: (double [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call double @_Z5ldexpdi(double [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret double [[LDEXP]]
+;
+  %ldexp = tail call double @_Z5ldexpdi(double %x, i32 %y)
+  ret double %ldexp
+}
+
+define <2 x double> @test_ldexp_v2f64(<2 x double> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x double> @test_ldexp_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[LDEXP]]
+;
+  %ldexp = tail call <2 x double> @_Z5ldexpDv2_dDv2_i(<2 x double> %x, <2 x i32> %y)
+  ret <2 x double> %ldexp
+}
+
+define <3 x double> @test_ldexp_v3f64(<3 x double> %x, <3 x i32> %y) {
+; CHECK-LABEL: define <3 x double> @test_ldexp_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double> [[X]], <3 x i32> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[LDEXP]]
+;
+  %ldexp = tail call <3 x double> @_Z5ldexpDv3_dDv3_i(<3 x double> %x, <3 x i32> %y)
+  ret <3 x double> %ldexp
+}
+
+define <4 x double> @test_ldexp_v4f64(<4 x double> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x double> @test_ldexp_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[LDEXP]]
+;
+  %ldexp = tail call <4 x double> @_Z5ldexpDv4_dDv4_i(<4 x double> %x, <4 x i32> %y)
+  ret <4 x double> %ldexp
+}
+
+define <8 x double> @test_ldexp_v8f64(<8 x double> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x double> @test_ldexp_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double> [[X]], <8 x i32> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[LDEXP]]
+;
+  %ldexp = tail call <8 x double> @_Z5ldexpDv8_dDv8_i(<8 x double> %x, <8 x i32> %y)
+  ret <8 x double> %ldexp
+}
+
+define <16 x double> @test_ldexp_v16f64(<16 x double> %x, <16 x i32> %y) {
+; CHECK-LABEL: define <16 x double> @test_ldexp_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double> [[X]], <16 x i32> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[LDEXP]]
+;
+  %ldexp = tail call <16 x double> @_Z5ldexpDv16_dDv16_i(<16 x double> %x, <16 x i32> %y)
+  ret <16 x double> %ldexp
+}
+
+define half @test_ldexp_f16(half %x, i32 %y) {
+; CHECK-LABEL: define half @test_ldexp_f16
+; CHECK-SAME: (half [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call half @_Z5ldexpDhi(half [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret half [[LDEXP]]
+;
+  %ldexp = tail call half @_Z5ldexpDhi(half %x, i32 %y)
+  ret half %ldexp
+}
+
+define <2 x half> @test_ldexp_v2f16(<2 x half> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x half> @test_ldexp_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[LDEXP]]
+;
+  %ldexp = tail call <2 x half> @_Z5ldexpDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y)
+  ret <2 x half> %ldexp
+}
+
+define <3 x half> @test_ldexp_v3f16(<3 x half> %x, <3 x i32> %y) {
+; CHECK-LABEL: define <3 x half> @test_ldexp_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half> [[X]], <3 x i32> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[LDEXP]]
+;
+  %ldexp = tail call <3 x half> @_Z5ldexpDv3_DhDv3_i(<3 x half> %x, <3 x i32> %y)
+  ret <3 x half> %ldexp
+}
+
+define <4 x half> @test_ldexp_v4f16(<4 x half> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x half> @test_ldexp_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[LDEXP]]
+;
+  %ldexp = tail call <4 x half> @_Z5ldexpDv4_DhDv4_i(<4 x half> %x, <4 x i32> %y)
+  ret <4 x half> %ldexp
+}
+
+define <8 x half> @test_ldexp_v8f16(<8 x half> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x half> @test_ldexp_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half> [[X]], <8 x i32> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[LDEXP]]
+;
+  %ldexp = tail call <8 x half> @_Z5ldexpDv8_DhDv8_i(<8 x half> %x, <8 x i32> %y)
+  ret <8 x half> %ldexp
+}
+
+define <16 x half> @test_ldexp_v16f16(<16 x half> %x, <16 x i32> %y) {
+; CHECK-LABEL: define <16 x half> @test_ldexp_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half> [[X]], <16 x i32> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[LDEXP]]
+;
+  %ldexp = tail call <16 x half> @_Z5ldexpDv16_DhDv16_i(<16 x half> %x, <16 x i32> %y)
+  ret <16 x half> %ldexp
+}
+
+define float @test_ldexp_f32_minsize(float %x, i32 %y) #3 {
+; CHECK-LABEL: define float @test_ldexp_f32_minsize
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret float [[LDEXP]]
+;
+  %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y)
+  ret float %ldexp
+}
+
+define float @test_ldexp_f32_nobuiltin(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_ldexp_f32_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[LDEXP]]
+;
+  %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) #0
+  ret float %ldexp
+}
+
+define float @test_ldexp_f32_strictfp(float %x, i32 %y) #4 {
+; CHECK-LABEL: define float @test_ldexp_f32_strictfp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[LDEXP:%.*]] = tail call nnan float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[LDEXP]]
+;
+  %ldexp = tail call nnan float @_Z5ldexpfi(float %x, i32 %y) #4
+  ret float %ldexp
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { minsize }
+attributes #4 = { strictfp }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll
@@ -0,0 +1,561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3logf(float)
+declare <2 x float> @_Z3logDv2_f(<2 x float>)
+declare <3 x float> @_Z3logDv3_f(<3 x float>)
+declare <4 x float> @_Z3logDv4_f(<4 x float>)
+declare <8 x float> @_Z3logDv8_f(<8 x float>)
+declare <16 x float> @_Z3logDv16_f(<16 x float>)
+
+declare double @_Z3logd(double)
+declare <2 x double> @_Z3logDv2_d(<2 x double>)
+declare <3 x double> @_Z3logDv3_d(<3 x double>)
+declare <4 x double> @_Z3logDv4_d(<4 x double>)
+declare <8 x double> @_Z3logDv8_d(<8 x double>)
+declare <16 x double> @_Z3logDv16_d(<16 x double>)
+
+declare half @_Z3logDh(half)
+declare <2 x half> @_Z3logDv2_Dh(<2 x half>)
+declare <3 x half> @_Z3logDv3_Dh(<3 x half>)
+declare <4 x half> @_Z3logDv4_Dh(<4 x half>)
+declare <8 x half> @_Z3logDv8_Dh(<8 x half>)
+declare <16 x half> @_Z3logDv16_Dh(<16 x half>)
+
+define float @test_log_f32(float %arg) {
+; CHECK-LABEL: define float @test_log_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define <2 x float> @test_log_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log
+}
+
+define <3 x float> @test_log_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <3 x float> @_Z3logDv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[LOG]]
+;
+  %log = tail call <3 x float> @_Z3logDv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %log
+}
+
+define <4 x float> @test_log_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <4 x float> @_Z3logDv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[LOG]]
+;
+  %log = tail call <4 x float> @_Z3logDv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %log
+}
+
+define <8 x float> @test_log_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <8 x float> @_Z3logDv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[LOG]]
+;
+  %log = tail call <8 x float> @_Z3logDv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %log
+}
+
+define <16 x float> @test_log_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <16 x float> @_Z3logDv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[LOG]]
+;
+  %log = tail call <16 x float> @_Z3logDv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %log
+}
+
+define float @test_log_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_log_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg)
+  ret float %log
+}
+
+define <2 x float> @test_log_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg)
+  ret <2 x float> %log
+}
+
+define <3 x float> @test_log_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <3 x float> @_Z3logDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[LOG]]
+;
+  %log = tail call <3 x float> @_Z3logDv3_f(<3 x float> %arg)
+  ret <3 x float> %log
+}
+
+define <4 x float> @test_log_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <4 x float> @_Z3logDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[LOG]]
+;
+  %log = tail call <4 x float> @_Z3logDv4_f(<4 x float> %arg)
+  ret <4 x float> %log
+}
+
+define <8 x float> @test_log_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <8 x float> @_Z3logDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[LOG]]
+;
+  %log = tail call <8 x float> @_Z3logDv8_f(<8 x float> %arg)
+  ret <8 x float> %log
+}
+
+define <16 x float> @test_log_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <16 x float> @_Z3logDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[LOG]]
+;
+  %log = tail call <16 x float> @_Z3logDv16_f(<16 x float> %arg)
+  ret <16 x float> %log
+}
+
+define double @test_log_f64(double %arg) {
+; CHECK-LABEL: define double @test_log_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call double @_Z3logd(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG]]
+;
+  %log = tail call double @_Z3logd(double %arg)
+  ret double %log
+}
+
+define <2 x double> @test_log_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_log_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x double> @_Z3logDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[LOG]]
+;
+  %log = tail call <2 x double> @_Z3logDv2_d(<2 x double> %arg)
+  ret <2 x double> %log
+}
+
+define <3 x double> @test_log_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_log_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <3 x double> @_Z3logDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[LOG]]
+;
+  %log = tail call <3 x double> @_Z3logDv3_d(<3 x double> %arg)
+  ret <3 x double> %log
+}
+
+define <4 x double> @test_log_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_log_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <4 x double> @_Z3logDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[LOG]]
+;
+  %log = tail call <4 x double> @_Z3logDv4_d(<4 x double> %arg)
+  ret <4 x double> %log
+}
+
+define <8 x double> @test_log_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_log_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <8 x double> @_Z3logDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[LOG]]
+;
+  %log = tail call <8 x double> @_Z3logDv8_d(<8 x double> %arg)
+  ret <8 x double> %log
+}
+
+define <16 x double> @test_log_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_log_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <16 x double> @_Z3logDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[LOG]]
+;
+  %log = tail call <16 x double> @_Z3logDv16_d(<16 x double> %arg)
+  ret <16 x double> %log
+}
+
+define half @test_log_f16(half %arg) {
+; CHECK-LABEL: define half @test_log_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call half @_Z3logDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG]]
+;
+  %log = tail call half @_Z3logDh(half %arg)
+  ret half %log
+}
+
+define half @test_log_f16_fast(half %arg) {
+; CHECK-LABEL: define half @test_log_f16_fast
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast half @_Z3logDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG]]
+;
+  %log = tail call fast half @_Z3logDh(half %arg)
+  ret half %log
+}
+
+define <2 x half> @test_log_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_log_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x half> @_Z3logDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[LOG]]
+;
+  %log = tail call <2 x half> @_Z3logDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %log
+}
+
+define <3 x half> @test_log_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_log_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <3 x half> @_Z3logDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[LOG]]
+;
+  %log = tail call <3 x half> @_Z3logDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %log
+}
+
+define <4 x half> @test_log_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_log_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <4 x half> @_Z3logDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[LOG]]
+;
+  %log = tail call <4 x half> @_Z3logDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %log
+}
+
+define <8 x half> @test_log_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_log_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <8 x half> @_Z3logDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[LOG]]
+;
+  %log = tail call <8 x half> @_Z3logDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %log
+}
+
+define <16 x half> @test_log_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_log_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <16 x half> @_Z3logDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[LOG]]
+;
+  %log = tail call <16 x half> @_Z3logDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %log
+}
+
+define float @test_log_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0
+  ret float %log
+}
+
+define <2 x float> @test_log_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log
+}
+
+define float @test_log_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg) #0
+  ret float %log
+}
+
+define <2 x float> @test_log_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log
+}
+
+; "no-builtins" should be ignored
+define float @test_log_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0
+  ret float %log
+}
+
+define <2 x float> @test_log_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log
+}
+
+define float @test_log_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @_Z3logf(float %arg) #0
+  ret float %log
+}
+
+define <2 x float> @test_log_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log
+}
+
+define float @test_log_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nnan ninf float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nnan ninf float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define <2 x float> @test_log_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nnan nsz contract <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call contract nsz nnan <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log
+}
+
+define float @test_log_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_log_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nnan ninf float @_Z3logf(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nnan ninf float @_Z3logf(float %arg), !fpmath !0, !foo !1
+  ret float %log
+}
+
+define <2 x float> @test_log_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nnan nsz contract <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call contract nsz nnan <2 x float> @_Z3logDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %log
+}
+
+define float @test_log_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call ninf contract float @_Z3logf(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call ninf contract float @_Z3logf(float %arg)
+  ret float %log
+}
+
+define <2 x float> @test_log_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nnan nsz <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG]]
+;
+  %log = tail call nnan nsz <2 x float> @_Z3logDv2_f(<2 x float> %arg)
+  ret <2 x float> %log
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @logf(float) #2
+declare double @log(double) #2
+
+define float @test_libm_log_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_log_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @logf(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @logf(float %arg)
+  ret float %log
+}
+
+define float @test_libm_log_f32_fast(float %arg) {
+; CHECK-LABEL: define float @test_libm_log_f32_fast
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @logf(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call fast float @logf(float %arg)
+  ret float %log
+}
+
+define float @test_libm_log_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_log_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call float @logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call float @logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define double @test_libm_log_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_log_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call double @log(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG]]
+;
+  %log = tail call double @log(double %arg)
+  ret double %log
+}
+
+define double @test_libm_log_f64_fast(double %arg) {
+; CHECK-LABEL: define double @test_libm_log_f64_fast
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast double @log(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG]]
+;
+  %log = tail call fast double @log(double %arg)
+  ret double %log
+}
+
+define double @test_libm_log_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_log_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call double @log(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[LOG]]
+;
+  %log = tail call double @log(double %arg), !fpmath !0
+  ret double %log
+}
+
+define float @test_log_f32_fast_noinline(float %arg) {
+; CHECK-LABEL: define float @test_log_f32_fast_noinline
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call fast float @_Z3logf(float %arg) #3, !fpmath !0
+  ret float %log
+}
+
+define float @test_log_f32_fast_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log_f32_fast_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call fast float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define float @test_log_f32_fast_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log_f32_fast_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call fast float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define float @test_log_f32_nsz_contract_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log_f32_nsz_contract_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz contract float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nsz contract float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define float @test_log_f32_nsz_contract_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log_f32_nsz_contract_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz contract float @_Z3logf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nsz contract float @_Z3logf(float %arg), !fpmath !0
+  ret float %log
+}
+
+define half @test_log_f16_fast_minsize(half %arg) #5 {
+; CHECK-LABEL: define half @test_log_f16_fast_minsize
+; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call half @_Z3logDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG]]
+;
+  %log = tail call half @_Z3logDh(half %arg)
+  ret half %log
+}
+
+define float @test_log_f32_strictfp(float %arg) #6 {
+; CHECK-LABEL: define float @test_log_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z3logf(float [[ARG]]) #[[ATTR4]]
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nsz float @_Z3logf(float %arg) #6
+  ret float %log
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { noinline }
+attributes #4 = { optsize }
+attributes #5 = { minsize }
+attributes #6 = { strictfp }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll
@@ -0,0 +1,561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z5log10f(float)
+declare <2 x float> @_Z5log10Dv2_f(<2 x float>)
+declare <3 x float> @_Z5log10Dv3_f(<3 x float>)
+declare <4 x float> @_Z5log10Dv4_f(<4 x float>)
+declare <8 x float> @_Z5log10Dv8_f(<8 x float>)
+declare <16 x float> @_Z5log10Dv16_f(<16 x float>)
+
+declare double @_Z5log10d(double)
+declare <2 x double> @_Z5log10Dv2_d(<2 x double>)
+declare <3 x double> @_Z5log10Dv3_d(<3 x double>)
+declare <4 x double> @_Z5log10Dv4_d(<4 x double>)
+declare <8 x double> @_Z5log10Dv8_d(<8 x double>)
+declare <16 x double> @_Z5log10Dv16_d(<16 x double>)
+
+declare half @_Z5log10Dh(half)
+declare <2 x half> @_Z5log10Dv2_Dh(<2 x half>)
+declare <3 x half> @_Z5log10Dv3_Dh(<3 x half>)
+declare <4 x half> @_Z5log10Dv4_Dh(<4 x half>)
+declare <8 x half> @_Z5log10Dv8_Dh(<8 x half>)
+declare <16 x half> @_Z5log10Dv16_Dh(<16 x half>)
+
+define float @test_log10_f32(float %arg) {
+; CHECK-LABEL: define float @test_log10_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log10
+}
+
+define <3 x float> @test_log10_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log10_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[LOG10]]
+;
+  %log10 = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %log10
+}
+
+define <4 x float> @test_log10_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log10_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[LOG10]]
+;
+  %log10 = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %log10
+}
+
+define <8 x float> @test_log10_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log10_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[LOG10]]
+;
+  %log10 = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %log10
+}
+
+define <16 x float> @test_log10_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log10_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[LOG10]]
+;
+  %log10 = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %log10
+}
+
+define float @test_log10_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_log10_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg)
+  ret float %log10
+}
+
+define <2 x float> @test_log10_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg)
+  ret <2 x float> %log10
+}
+
+define <3 x float> @test_log10_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log10_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[LOG10]]
+;
+  %log10 = tail call <3 x float> @_Z5log10Dv3_f(<3 x float> %arg)
+  ret <3 x float> %log10
+}
+
+define <4 x float> @test_log10_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log10_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[LOG10]]
+;
+  %log10 = tail call <4 x float> @_Z5log10Dv4_f(<4 x float> %arg)
+  ret <4 x float> %log10
+}
+
+define <8 x float> @test_log10_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log10_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[LOG10]]
+;
+  %log10 = tail call <8 x float> @_Z5log10Dv8_f(<8 x float> %arg)
+  ret <8 x float> %log10
+}
+
+define <16 x float> @test_log10_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log10_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[LOG10]]
+;
+  %log10 = tail call <16 x float> @_Z5log10Dv16_f(<16 x float> %arg)
+  ret <16 x float> %log10
+}
+
+define double @test_log10_f64(double %arg) {
+; CHECK-LABEL: define double @test_log10_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call double @_Z5log10d(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG10]]
+;
+  %log10 = tail call double @_Z5log10d(double %arg)
+  ret double %log10
+}
+
+define <2 x double> @test_log10_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_log10_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x double> @_Z5log10Dv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[LOG10]]
+;
+  %log10 = tail call <2 x double> @_Z5log10Dv2_d(<2 x double> %arg)
+  ret <2 x double> %log10
+}
+
+define <3 x double> @test_log10_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_log10_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <3 x double> @_Z5log10Dv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[LOG10]]
+;
+  %log10 = tail call <3 x double> @_Z5log10Dv3_d(<3 x double> %arg)
+  ret <3 x double> %log10
+}
+
+define <4 x double> @test_log10_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_log10_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <4 x double> @_Z5log10Dv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[LOG10]]
+;
+  %log10 = tail call <4 x double> @_Z5log10Dv4_d(<4 x double> %arg)
+  ret <4 x double> %log10
+}
+
+define <8 x double> @test_log10_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_log10_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <8 x double> @_Z5log10Dv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[LOG10]]
+;
+  %log10 = tail call <8 x double> @_Z5log10Dv8_d(<8 x double> %arg)
+  ret <8 x double> %log10
+}
+
+define <16 x double> @test_log10_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_log10_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <16 x double> @_Z5log10Dv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[LOG10]]
+;
+  %log10 = tail call <16 x double> @_Z5log10Dv16_d(<16 x double> %arg)
+  ret <16 x double> %log10
+}
+
+define half @test_log10_f16(half %arg) {
+; CHECK-LABEL: define half @test_log10_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call half @_Z5log10Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG10]]
+;
+  %log10 = tail call half @_Z5log10Dh(half %arg)
+  ret half %log10
+}
+
+define half @test_log10_f16_fast(half %arg) {
+; CHECK-LABEL: define half @test_log10_f16_fast
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast half @_Z5log10Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG10]]
+;
+  %log10 = tail call fast half @_Z5log10Dh(half %arg)
+  ret half %log10
+}
+
+define <2 x half> @test_log10_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_log10_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x half> @_Z5log10Dv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[LOG10]]
+;
+  %log10 = tail call <2 x half> @_Z5log10Dv2_Dh(<2 x half> %arg)
+  ret <2 x half> %log10
+}
+
+define <3 x half> @test_log10_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_log10_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <3 x half> @_Z5log10Dv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[LOG10]]
+;
+  %log10 = tail call <3 x half> @_Z5log10Dv3_Dh(<3 x half> %arg)
+  ret <3 x half> %log10
+}
+
+define <4 x half> @test_log10_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_log10_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <4 x half> @_Z5log10Dv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[LOG10]]
+;
+  %log10 = tail call <4 x half> @_Z5log10Dv4_Dh(<4 x half> %arg)
+  ret <4 x half> %log10
+}
+
+define <8 x half> @test_log10_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_log10_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <8 x half> @_Z5log10Dv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[LOG10]]
+;
+  %log10 = tail call <8 x half> @_Z5log10Dv8_Dh(<8 x half> %arg)
+  ret <8 x half> %log10
+}
+
+define <16 x half> @test_log10_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_log10_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <16 x half> @_Z5log10Dv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[LOG10]]
+;
+  %log10 = tail call <16 x half> @_Z5log10Dv16_Dh(<16 x half> %arg)
+  ret <16 x half> %log10
+}
+
+define float @test_log10_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log10_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log10
+}
+
+define float @test_log10_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log10_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg) #0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log10
+}
+
+; "no-builtins" should be ignored
+define float @test_log10_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log10_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log10_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log10
+}
+
+define float @test_log10_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log10_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @_Z5log10f(float %arg) #0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log10
+}
+
+define float @test_log10_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log10_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nnan ninf float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call nnan ninf float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define <2 x float> @test_log10_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nnan nsz contract <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call contract nsz nnan <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log10
+}
+
+define float @test_log10_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_log10_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nnan ninf float @_Z5log10f(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call nnan ninf float @_Z5log10f(float %arg), !fpmath !0, !foo !1
+  ret float %log10
+}
+
+define <2 x float> @test_log10_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nnan nsz contract <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call contract nsz nnan <2 x float> @_Z5log10Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %log10
+}
+
+define float @test_log10_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log10_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call ninf contract float @_Z5log10f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call ninf contract float @_Z5log10f(float %arg)
+  ret float %log10
+}
+
+define <2 x float> @test_log10_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nnan nsz <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG10]]
+;
+  %log10 = tail call nnan nsz <2 x float> @_Z5log10Dv2_f(<2 x float> %arg)
+  ret <2 x float> %log10
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @log10f(float) #2
+declare double @log10(double) #2
+
+define float @test_libm_log10_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_log10_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @log10f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @log10f(float %arg)
+  ret float %log10
+}
+
+define float @test_libm_log10_f32_fast(float %arg) {
+; CHECK-LABEL: define float @test_libm_log10_f32_fast
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @log10f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call fast float @log10f(float %arg)
+  ret float %log10
+}
+
+define float @test_libm_log10_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_log10_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call float @log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define double @test_libm_log10_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_log10_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call double @log10(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG10]]
+;
+  %log10 = tail call double @log10(double %arg)
+  ret double %log10
+}
+
+define double @test_libm_log10_f64_fast(double %arg) {
+; CHECK-LABEL: define double @test_libm_log10_f64_fast
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast double @log10(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG10]]
+;
+  %log10 = tail call fast double @log10(double %arg)
+  ret double %log10
+}
+
+define double @test_libm_log10_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_log10_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call double @log10(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[LOG10]]
+;
+  %log10 = tail call double @log10(double %arg), !fpmath !0
+  ret double %log10
+}
+
+define float @test_log10_f32_fast_noinline(float %arg) {
+; CHECK-LABEL: define float @test_log10_f32_fast_noinline
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call fast float @_Z5log10f(float %arg) #3, !fpmath !0
+  ret float %log10
+}
+
+define float @test_log10_f32_fast_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log10_f32_fast_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call fast float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define float @test_log10_f32_fast_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log10_f32_fast_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call fast float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define float @test_log10_f32_nsz_contract_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log10_f32_nsz_contract_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nsz contract float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call nsz contract float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define float @test_log10_f32_nsz_contract_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log10_f32_nsz_contract_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call nsz contract float @_Z5log10f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG10]]
+;
+  %log10 = tail call nsz contract float @_Z5log10f(float %arg), !fpmath !0
+  ret float %log10
+}
+
+define half @test_log10_f16_fast_minsize(half %arg) #5 {
+; CHECK-LABEL: define half @test_log10_f16_fast_minsize
+; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast half @_Z5log10Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG10]]
+;
+  %log10 = tail call fast half @_Z5log10Dh(half %arg)
+  ret half %log10
+}
+
+define float @test_log10_f32_strictfp(float %arg) #6 {
+; CHECK-LABEL: define float @test_log10_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z5log10f(float [[ARG]]) #[[ATTR4]]
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nsz float @_Z5log10f(float %arg) #6
+  ret float %log
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { noinline }
+attributes #4 = { optsize }
+attributes #5 = { minsize }
+attributes #6 = { strictfp }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll
@@ -0,0 +1,561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4log2f(float)
+declare <2 x float> @_Z4log2Dv2_f(<2 x float>)
+declare <3 x float> @_Z4log2Dv3_f(<3 x float>)
+declare <4 x float> @_Z4log2Dv4_f(<4 x float>)
+declare <8 x float> @_Z4log2Dv8_f(<8 x float>)
+declare <16 x float> @_Z4log2Dv16_f(<16 x float>)
+
+declare double @_Z4log2d(double)
+declare <2 x double> @_Z4log2Dv2_d(<2 x double>)
+declare <3 x double> @_Z4log2Dv3_d(<3 x double>)
+declare <4 x double> @_Z4log2Dv4_d(<4 x double>)
+declare <8 x double> @_Z4log2Dv8_d(<8 x double>)
+declare <16 x double> @_Z4log2Dv16_d(<16 x double>)
+
+declare half @_Z4log2Dh(half)
+declare <2 x half> @_Z4log2Dv2_Dh(<2 x half>)
+declare <3 x half> @_Z4log2Dv3_Dh(<3 x half>)
+declare <4 x half> @_Z4log2Dv4_Dh(<4 x half>)
+declare <8 x half> @_Z4log2Dv8_Dh(<8 x half>)
+declare <16 x half> @_Z4log2Dv16_Dh(<16 x half>)
+
+define float @test_log2_f32(float %arg) {
+; CHECK-LABEL: define float @test_log2_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log2
+}
+
+define <3 x float> @test_log2_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log2_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[LOG2]]
+;
+  %log2 = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %log2
+}
+
+define <4 x float> @test_log2_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log2_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[LOG2]]
+;
+  %log2 = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %log2
+}
+
+define <8 x float> @test_log2_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log2_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[LOG2]]
+;
+  %log2 = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %log2
+}
+
+define <16 x float> @test_log2_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log2_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[LOG2]]
+;
+  %log2 = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %log2
+}
+
+define float @test_log2_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_log2_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg)
+  ret float %log2
+}
+
+define <2 x float> @test_log2_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg)
+  ret <2 x float> %log2
+}
+
+define <3 x float> @test_log2_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_log2_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[LOG2]]
+;
+  %log2 = tail call <3 x float> @_Z4log2Dv3_f(<3 x float> %arg)
+  ret <3 x float> %log2
+}
+
+define <4 x float> @test_log2_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_log2_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[LOG2]]
+;
+  %log2 = tail call <4 x float> @_Z4log2Dv4_f(<4 x float> %arg)
+  ret <4 x float> %log2
+}
+
+define <8 x float> @test_log2_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_log2_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[LOG2]]
+;
+  %log2 = tail call <8 x float> @_Z4log2Dv8_f(<8 x float> %arg)
+  ret <8 x float> %log2
+}
+
+define <16 x float> @test_log2_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_log2_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[LOG2]]
+;
+  %log2 = tail call <16 x float> @_Z4log2Dv16_f(<16 x float> %arg)
+  ret <16 x float> %log2
+}
+
+define double @test_log2_f64(double %arg) {
+; CHECK-LABEL: define double @test_log2_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call double @_Z4log2d(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG2]]
+;
+  %log2 = tail call double @_Z4log2d(double %arg)
+  ret double %log2
+}
+
+define <2 x double> @test_log2_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_log2_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x double> @_Z4log2Dv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[LOG2]]
+;
+  %log2 = tail call <2 x double> @_Z4log2Dv2_d(<2 x double> %arg)
+  ret <2 x double> %log2
+}
+
+define <3 x double> @test_log2_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_log2_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <3 x double> @_Z4log2Dv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[LOG2]]
+;
+  %log2 = tail call <3 x double> @_Z4log2Dv3_d(<3 x double> %arg)
+  ret <3 x double> %log2
+}
+
+define <4 x double> @test_log2_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_log2_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <4 x double> @_Z4log2Dv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[LOG2]]
+;
+  %log2 = tail call <4 x double> @_Z4log2Dv4_d(<4 x double> %arg)
+  ret <4 x double> %log2
+}
+
+define <8 x double> @test_log2_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_log2_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <8 x double> @_Z4log2Dv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[LOG2]]
+;
+  %log2 = tail call <8 x double> @_Z4log2Dv8_d(<8 x double> %arg)
+  ret <8 x double> %log2
+}
+
+define <16 x double> @test_log2_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_log2_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <16 x double> @_Z4log2Dv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[LOG2]]
+;
+  %log2 = tail call <16 x double> @_Z4log2Dv16_d(<16 x double> %arg)
+  ret <16 x double> %log2
+}
+
+define half @test_log2_f16(half %arg) {
+; CHECK-LABEL: define half @test_log2_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call half @_Z4log2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG2]]
+;
+  %log2 = tail call half @_Z4log2Dh(half %arg)
+  ret half %log2
+}
+
+define half @test_log2_f16_fast(half %arg) {
+; CHECK-LABEL: define half @test_log2_f16_fast
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast half @_Z4log2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG2]]
+;
+  %log2 = tail call fast half @_Z4log2Dh(half %arg)
+  ret half %log2
+}
+
+define <2 x half> @test_log2_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_log2_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x half> @_Z4log2Dv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[LOG2]]
+;
+  %log2 = tail call <2 x half> @_Z4log2Dv2_Dh(<2 x half> %arg)
+  ret <2 x half> %log2
+}
+
+define <3 x half> @test_log2_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_log2_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <3 x half> @_Z4log2Dv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[LOG2]]
+;
+  %log2 = tail call <3 x half> @_Z4log2Dv3_Dh(<3 x half> %arg)
+  ret <3 x half> %log2
+}
+
+define <4 x half> @test_log2_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_log2_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <4 x half> @_Z4log2Dv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[LOG2]]
+;
+  %log2 = tail call <4 x half> @_Z4log2Dv4_Dh(<4 x half> %arg)
+  ret <4 x half> %log2
+}
+
+define <8 x half> @test_log2_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_log2_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <8 x half> @_Z4log2Dv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[LOG2]]
+;
+  %log2 = tail call <8 x half> @_Z4log2Dv8_Dh(<8 x half> %arg)
+  ret <8 x half> %log2
+}
+
+define <16 x half> @test_log2_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_log2_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <16 x half> @_Z4log2Dv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[LOG2]]
+;
+  %log2 = tail call <16 x half> @_Z4log2Dv16_Dh(<16 x half> %arg)
+  ret <16 x half> %log2
+}
+
+define float @test_log2_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log2_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log2
+}
+
+define float @test_log2_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_log2_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg) #0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log2
+}
+
+; "no-builtins" should be ignored
+define float @test_log2_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log2_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log2_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %log2
+}
+
+define float @test_log2_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_log2_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @_Z4log2f(float %arg) #0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR5]]
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0
+  ret <2 x float> %log2
+}
+
+define float @test_log2_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log2_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nnan ninf float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call nnan ninf float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define <2 x float> @test_log2_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call contract nsz nnan <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %log2
+}
+
+define float @test_log2_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_log2_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nnan ninf float @_Z4log2f(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call nnan ninf float @_Z4log2f(float %arg), !fpmath !0, !foo !1
+  ret float %log2
+}
+
+define <2 x float> @test_log2_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nnan nsz contract <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call contract nsz nnan <2 x float> @_Z4log2Dv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %log2
+}
+
+define float @test_log2_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_log2_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call ninf contract float @_Z4log2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call ninf contract float @_Z4log2f(float %arg)
+  ret float %log2
+}
+
+define <2 x float> @test_log2_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nnan nsz <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[LOG2]]
+;
+  %log2 = tail call nnan nsz <2 x float> @_Z4log2Dv2_f(<2 x float> %arg)
+  ret <2 x float> %log2
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @log2f(float) #2
+declare double @log2(double) #2
+
+define float @test_libm_log2_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_log2_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @log2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @log2f(float %arg)
+  ret float %log2
+}
+
+define float @test_libm_log2_f32_fast(float %arg) {
+; CHECK-LABEL: define float @test_libm_log2_f32_fast
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @log2f(float [[ARG]])
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call fast float @log2f(float %arg)
+  ret float %log2
+}
+
+define float @test_libm_log2_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_log2_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call float @log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define double @test_libm_log2_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_log2_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call double @log2(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG2]]
+;
+  %log2 = tail call double @log2(double %arg)
+  ret double %log2
+}
+
+define double @test_libm_log2_f64_fast(double %arg) {
+; CHECK-LABEL: define double @test_libm_log2_f64_fast
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast double @log2(double [[ARG]])
+; CHECK-NEXT:    ret double [[LOG2]]
+;
+  %log2 = tail call fast double @log2(double %arg)
+  ret double %log2
+}
+
+define double @test_libm_log2_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_log2_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call double @log2(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[LOG2]]
+;
+  %log2 = tail call double @log2(double %arg), !fpmath !0
+  ret double %log2
+}
+
+define float @test_log2_f32_fast_noinline(float %arg) {
+; CHECK-LABEL: define float @test_log2_f32_fast_noinline
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call fast float @_Z4log2f(float %arg) #3, !fpmath !0
+  ret float %log2
+}
+
+define float @test_log2_f32_fast_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log2_f32_fast_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call fast float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define float @test_log2_f32_fast_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log2_f32_fast_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call fast float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define float @test_log2_f32_nsz_contract_optsize(float %arg) #4 {
+; CHECK-LABEL: define float @test_log2_f32_nsz_contract_optsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nsz contract float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call nsz contract float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define float @test_log2_f32_nsz_contract_minsize(float %arg) #5 {
+; CHECK-LABEL: define float @test_log2_f32_nsz_contract_minsize
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call nsz contract float @_Z4log2f(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[LOG2]]
+;
+  %log2 = tail call nsz contract float @_Z4log2f(float %arg), !fpmath !0
+  ret float %log2
+}
+
+define half @test_log2_f16_fast_minsize(half %arg) #5 {
+; CHECK-LABEL: define half @test_log2_f16_fast_minsize
+; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast half @_Z4log2Dh(half [[ARG]])
+; CHECK-NEXT:    ret half [[LOG2]]
+;
+  %log2 = tail call fast half @_Z4log2Dh(half %arg)
+  ret half %log2
+}
+
+define float @test_log2_f32_strictfp(float %arg) #6 {
+; CHECK-LABEL: define float @test_log2_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z4log2f(float [[ARG]]) #[[ATTR4]]
+; CHECK-NEXT:    ret float [[LOG]]
+;
+  %log = tail call nsz float @_Z4log2f(float %arg) #6
+  ret float %log
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { noinline }
+attributes #4 = { optsize }
+attributes #5 = { minsize }
+attributes #6 = { strictfp }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3madfff(float, float, float)
+declare <2 x float> @_Z3madDv2_fS_S_(<2 x float>, <2 x float>, <2 x float>)
+declare <3 x float> @_Z3madDv3_fS_S_(<3 x float>, <3 x float>, <3 x float>)
+declare <4 x float> @_Z3madDv4_fS_S_(<4 x float>, <4 x float>, <4 x float>)
+declare <8 x float> @_Z3madDv8_fS_S_(<8 x float>, <8 x float>, <8 x float>)
+declare <16 x float> @_Z3madDv16_fS_S_(<16 x float>, <16 x float>, <16 x float>)
+declare double @_Z3madddd(double, double, double)
+declare <2 x double> @_Z3madDv2_dS_S_(<2 x double>, <2 x double>, <2 x double>)
+declare <3 x double> @_Z3madDv3_dS_S_(<3 x double>, <3 x double>, <3 x double>)
+declare <4 x double> @_Z3madDv4_dS_S_(<4 x double>, <4 x double>, <4 x double>)
+declare <8 x double> @_Z3madDv8_dS_S_(<8 x double>, <8 x double>, <8 x double>)
+declare <16 x double> @_Z3madDv16_dS_S_(<16 x double>, <16 x double>, <16 x double>)
+declare half @_Z3madDhDhDh(half, half, half)
+declare <2 x half> @_Z3madDv2_DhS_S_(<2 x half>, <2 x half>, <2 x half>)
+declare <3 x half> @_Z3madDv3_DhS_S_(<3 x half>, <3 x half>, <3 x half>)
+declare <4 x half> @_Z3madDv4_DhS_S_(<4 x half>, <4 x half>, <4 x half>)
+declare <8 x half> @_Z3madDv8_DhS_S_(<8 x half>, <8 x half>, <8 x half>)
+declare <16 x half> @_Z3madDv16_DhS_S_(<16 x half>, <16 x half>, <16 x half>)
+
+define float @test_mad_f32(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_mad_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call float @_Z3madfff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call float @_Z3madfff(float %x, float %y, float %z)
+  ret float %mad
+}
+
+define <2 x float> @test_mad_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: define <2 x float> @test_mad_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <2 x float> @_Z3madDv2_fS_S_(<2 x float> [[X]], <2 x float> [[Y]], <2 x float> [[Z]])
+; CHECK-NEXT:    ret <2 x float> [[MAD]]
+;
+  %mad = tail call <2 x float> @_Z3madDv2_fS_S_(<2 x float> %x, <2 x float> %y, <2 x float> %z)
+  ret <2 x float> %mad
+}
+
+define <3 x float> @test_mad_v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z) {
+; CHECK-LABEL: define <3 x float> @test_mad_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]], <3 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <3 x float> @_Z3madDv3_fS_S_(<3 x float> [[X]], <3 x float> [[Y]], <3 x float> [[Z]])
+; CHECK-NEXT:    ret <3 x float> [[MAD]]
+;
+  %mad = tail call <3 x float> @_Z3madDv3_fS_S_(<3 x float> %x, <3 x float> %y, <3 x float> %z)
+  ret <3 x float> %mad
+}
+
+define <4 x float> @test_mad_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <4 x float> @test_mad_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <4 x float> @_Z3madDv4_fS_S_(<4 x float> [[X]], <4 x float> [[Y]], <4 x float> [[Z]])
+; CHECK-NEXT:    ret <4 x float> [[MAD]]
+;
+  %mad = tail call <4 x float> @_Z3madDv4_fS_S_(<4 x float> %x, <4 x float> %y, <4 x float> %z)
+  ret <4 x float> %mad
+}
+
+define <8 x float> @test_mad_v8f32(<8 x float> %x, <8 x float> %y, <8 x float> %z) {
+; CHECK-LABEL: define <8 x float> @test_mad_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]], <8 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <8 x float> @_Z3madDv8_fS_S_(<8 x float> [[X]], <8 x float> [[Y]], <8 x float> [[Z]])
+; CHECK-NEXT:    ret <8 x float> [[MAD]]
+;
+  %mad = tail call <8 x float> @_Z3madDv8_fS_S_(<8 x float> %x, <8 x float> %y, <8 x float> %z)
+  ret <8 x float> %mad
+}
+
+define <16 x float> @test_mad_v16f32(<16 x float> %x, <16 x float> %y, <16 x float> %z) {
+; CHECK-LABEL: define <16 x float> @test_mad_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]], <16 x float> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <16 x float> @_Z3madDv16_fS_S_(<16 x float> [[X]], <16 x float> [[Y]], <16 x float> [[Z]])
+; CHECK-NEXT:    ret <16 x float> [[MAD]]
+;
+  %mad = tail call <16 x float> @_Z3madDv16_fS_S_(<16 x float> %x, <16 x float> %y, <16 x float> %z)
+  ret <16 x float> %mad
+}
+
+define double @test_mad_f64(double %x, double %y, double %z) {
+; CHECK-LABEL: define double @test_mad_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call double @_Z3madddd(double [[X]], double [[Y]], double [[Z]])
+; CHECK-NEXT:    ret double [[MAD]]
+;
+  %mad = tail call double @_Z3madddd(double %x, double %y, double %z)
+  ret double %mad
+}
+
+define <2 x double> @test_mad_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: define <2 x double> @test_mad_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <2 x double> @_Z3madDv2_dS_S_(<2 x double> [[X]], <2 x double> [[Y]], <2 x double> [[Z]])
+; CHECK-NEXT:    ret <2 x double> [[MAD]]
+;
+  %mad = tail call <2 x double> @_Z3madDv2_dS_S_(<2 x double> %x, <2 x double> %y, <2 x double> %z)
+  ret <2 x double> %mad
+}
+
+define <3 x double> @test_mad_v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z) {
+; CHECK-LABEL: define <3 x double> @test_mad_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]], <3 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <3 x double> @_Z3madDv3_dS_S_(<3 x double> [[X]], <3 x double> [[Y]], <3 x double> [[Z]])
+; CHECK-NEXT:    ret <3 x double> [[MAD]]
+;
+  %mad = tail call <3 x double> @_Z3madDv3_dS_S_(<3 x double> %x, <3 x double> %y, <3 x double> %z)
+  ret <3 x double> %mad
+}
+
+define <4 x double> @test_mad_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
+; CHECK-LABEL: define <4 x double> @test_mad_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]], <4 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <4 x double> @_Z3madDv4_dS_S_(<4 x double> [[X]], <4 x double> [[Y]], <4 x double> [[Z]])
+; CHECK-NEXT:    ret <4 x double> [[MAD]]
+;
+  %mad = tail call <4 x double> @_Z3madDv4_dS_S_(<4 x double> %x, <4 x double> %y, <4 x double> %z)
+  ret <4 x double> %mad
+}
+
+define <8 x double> @test_mad_v8f64(<8 x double> %x, <8 x double> %y, <8 x double> %z) {
+; CHECK-LABEL: define <8 x double> @test_mad_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]], <8 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <8 x double> @_Z3madDv8_dS_S_(<8 x double> [[X]], <8 x double> [[Y]], <8 x double> [[Z]])
+; CHECK-NEXT:    ret <8 x double> [[MAD]]
+;
+  %mad = tail call <8 x double> @_Z3madDv8_dS_S_(<8 x double> %x, <8 x double> %y, <8 x double> %z)
+  ret <8 x double> %mad
+}
+
+define <16 x double> @test_mad_v16f64(<16 x double> %x, <16 x double> %y, <16 x double> %z) {
+; CHECK-LABEL: define <16 x double> @test_mad_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]], <16 x double> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <16 x double> @_Z3madDv16_dS_S_(<16 x double> [[X]], <16 x double> [[Y]], <16 x double> [[Z]])
+; CHECK-NEXT:    ret <16 x double> [[MAD]]
+;
+  %mad = tail call <16 x double> @_Z3madDv16_dS_S_(<16 x double> %x, <16 x double> %y, <16 x double> %z)
+  ret <16 x double> %mad
+}
+
+define half @test_mad_f16(half %x, half %y, half %z) {
+; CHECK-LABEL: define half @test_mad_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]], half [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call half @_Z3madDhDhDh(half [[X]], half [[Y]], half [[Z]])
+; CHECK-NEXT:    ret half [[MAD]]
+;
+  %mad = tail call half @_Z3madDhDhDh(half %x, half %y, half %z)
+  ret half %mad
+}
+
+define <2 x half> @test_mad_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
+; CHECK-LABEL: define <2 x half> @test_mad_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]], <2 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <2 x half> @_Z3madDv2_DhS_S_(<2 x half> [[X]], <2 x half> [[Y]], <2 x half> [[Z]])
+; CHECK-NEXT:    ret <2 x half> [[MAD]]
+;
+  %mad = tail call <2 x half> @_Z3madDv2_DhS_S_(<2 x half> %x, <2 x half> %y, <2 x half> %z)
+  ret <2 x half> %mad
+}
+
+define <3 x half> @test_mad_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
+; CHECK-LABEL: define <3 x half> @test_mad_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <3 x half> @_Z3madDv3_DhS_S_(<3 x half> [[X]], <3 x half> [[Y]], <3 x half> [[Z]])
+; CHECK-NEXT:    ret <3 x half> [[MAD]]
+;
+  %mad = tail call <3 x half> @_Z3madDv3_DhS_S_(<3 x half> %x, <3 x half> %y, <3 x half> %z)
+  ret <3 x half> %mad
+}
+
+define <4 x half> @test_mad_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
+; CHECK-LABEL: define <4 x half> @test_mad_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]], <4 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <4 x half> @_Z3madDv4_DhS_S_(<4 x half> [[X]], <4 x half> [[Y]], <4 x half> [[Z]])
+; CHECK-NEXT:    ret <4 x half> [[MAD]]
+;
+  %mad = tail call <4 x half> @_Z3madDv4_DhS_S_(<4 x half> %x, <4 x half> %y, <4 x half> %z)
+  ret <4 x half> %mad
+}
+
+define <8 x half> @test_mad_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
+; CHECK-LABEL: define <8 x half> @test_mad_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]], <8 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <8 x half> @_Z3madDv8_DhS_S_(<8 x half> [[X]], <8 x half> [[Y]], <8 x half> [[Z]])
+; CHECK-NEXT:    ret <8 x half> [[MAD]]
+;
+  %mad = tail call <8 x half> @_Z3madDv8_DhS_S_(<8 x half> %x, <8 x half> %y, <8 x half> %z)
+  ret <8 x half> %mad
+}
+
+define <16 x half> @test_mad_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
+; CHECK-LABEL: define <16 x half> @test_mad_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]], <16 x half> [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call <16 x half> @_Z3madDv16_DhS_S_(<16 x half> [[X]], <16 x half> [[Y]], <16 x half> [[Z]])
+; CHECK-NEXT:    ret <16 x half> [[MAD]]
+;
+  %mad = tail call <16 x half> @_Z3madDv16_DhS_S_(<16 x half> %x, <16 x half> %y, <16 x half> %z)
+  ret <16 x half> %mad
+}
+
+define float @test_mad_f32_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_mad_f32_fast
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z)
+  ret float %mad
+}
+
+define float @test_mad_f32_noinline(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_mad_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #1
+  ret float %mad
+}
+
+define float @test_mad_f32_fast_minsize(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: define float @test_mad_f32_fast_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z)
+  ret float %mad
+}
+
+define float @test_mad_f32_fast_strictfp(float %x, float %y, float %z) #2 {
+; CHECK-LABEL: define float @test_mad_f32_fast_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call nnan nsz float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call nsz nnan float @_Z3madfff(float %x, float %y, float %z) #2
+  ret float %mad
+}
+
+define float @test_mad_f32_fast_nobuiltin(float %x, float %y, float %z) {
+; CHECK-LABEL: define float @test_mad_f32_fast_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[MAD]]
+;
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #3
+  ret float %mad
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4rintf(float)
+declare <2 x float> @_Z4rintDv2_f(<2 x float>)
+declare <3 x float> @_Z4rintDv3_f(<3 x float>)
+declare <4 x float> @_Z4rintDv4_f(<4 x float>)
+declare <8 x float> @_Z4rintDv8_f(<8 x float>)
+declare <16 x float> @_Z4rintDv16_f(<16 x float>)
+
+declare double @_Z4rintd(double)
+declare <2 x double> @_Z4rintDv2_d(<2 x double>)
+declare <3 x double> @_Z4rintDv3_d(<3 x double>)
+declare <4 x double> @_Z4rintDv4_d(<4 x double>)
+declare <8 x double> @_Z4rintDv8_d(<8 x double>)
+declare <16 x double> @_Z4rintDv16_d(<16 x double>)
+
+declare half @_Z4rintDh(half)
+declare <2 x half> @_Z4rintDv2_Dh(<2 x half>)
+declare <3 x half> @_Z4rintDv3_Dh(<3 x half>)
+declare <4 x half> @_Z4rintDv4_Dh(<4 x half>)
+declare <8 x half> @_Z4rintDv8_Dh(<8 x half>)
+declare <16 x half> @_Z4rintDv16_Dh(<16 x half>)
+
+define float @test_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z4rintf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define <3 x float> @test_rint_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_rint_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x float> @_Z4rintDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[RINT]]
+;
+  %rint = tail call <3 x float> @_Z4rintDv3_f(<3 x float> %arg)
+  ret <3 x float> %rint
+}
+
+define <4 x float> @test_rint_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_rint_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x float> @_Z4rintDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[RINT]]
+;
+  %rint = tail call <4 x float> @_Z4rintDv4_f(<4 x float> %arg)
+  ret <4 x float> %rint
+}
+
+define <8 x float> @test_rint_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_rint_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x float> @_Z4rintDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[RINT]]
+;
+  %rint = tail call <8 x float> @_Z4rintDv8_f(<8 x float> %arg)
+  ret <8 x float> %rint
+}
+
+define <16 x float> @test_rint_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_rint_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x float> @_Z4rintDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[RINT]]
+;
+  %rint = tail call <16 x float> @_Z4rintDv16_f(<16 x float> %arg)
+  ret <16 x float> %rint
+}
+
+define double @test_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @_Z4rintd(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @_Z4rintd(double %arg)
+  ret double %rint
+}
+
+define <2 x double> @test_rint_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_rint_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x double> @_Z4rintDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[RINT]]
+;
+  %rint = tail call <2 x double> @_Z4rintDv2_d(<2 x double> %arg)
+  ret <2 x double> %rint
+}
+
+define <3 x double> @test_rint_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_rint_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x double> @_Z4rintDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[RINT]]
+;
+  %rint = tail call <3 x double> @_Z4rintDv3_d(<3 x double> %arg)
+  ret <3 x double> %rint
+}
+
+define <4 x double> @test_rint_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_rint_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x double> @_Z4rintDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[RINT]]
+;
+  %rint = tail call <4 x double> @_Z4rintDv4_d(<4 x double> %arg)
+  ret <4 x double> %rint
+}
+
+define <8 x double> @test_rint_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_rint_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x double> @_Z4rintDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[RINT]]
+;
+  %rint = tail call <8 x double> @_Z4rintDv8_d(<8 x double> %arg)
+  ret <8 x double> %rint
+}
+
+define <16 x double> @test_rint_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_rint_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x double> @_Z4rintDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[RINT]]
+;
+  %rint = tail call <16 x double> @_Z4rintDv16_d(<16 x double> %arg)
+  ret <16 x double> %rint
+}
+
+define half @test_rint_f16(half %arg) {
+; CHECK-LABEL: define half @test_rint_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call half @_Z4rintDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[RINT]]
+;
+  %rint = tail call half @_Z4rintDh(half %arg)
+  ret half %rint
+}
+
+define <2 x half> @test_rint_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_rint_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x half> @_Z4rintDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[RINT]]
+;
+  %rint = tail call <2 x half> @_Z4rintDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %rint
+}
+
+define <3 x half> @test_rint_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_rint_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x half> @_Z4rintDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[RINT]]
+;
+  %rint = tail call <3 x half> @_Z4rintDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %rint
+}
+
+define <4 x half> @test_rint_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_rint_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x half> @_Z4rintDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[RINT]]
+;
+  %rint = tail call <4 x half> @_Z4rintDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %rint
+}
+
+define <8 x half> @test_rint_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_rint_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x half> @_Z4rintDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[RINT]]
+;
+  %rint = tail call <8 x half> @_Z4rintDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %rint
+}
+
+define <16 x half> @test_rint_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_rint_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x half> @_Z4rintDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[RINT]]
+;
+  %rint = tail call <16 x half> @_Z4rintDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %rint
+}
+
+define float @test_rint_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z4rintf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+; "no-builtins" should be ignored
+define float @test_rint_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z4rintf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z4rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z4rintf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z4rintDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z4rintf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z4rintf(float %arg), !foo !0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z4rintDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %rint
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @rintf(float) #2
+declare double @rint(double) #2
+
+define float @test_libm_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @rintf(float %arg)
+  ret float %rint
+}
+
+define double @test_libm_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @rint(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @rint(double %arg)
+  ret double %rint
+}
+
+define float @test_rint_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_rint_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z4rintf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan float @_Z4rintf(float %arg) #3
+  ret float %rint
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z5roundf(float)
+declare <2 x float> @_Z5roundDv2_f(<2 x float>)
+declare <3 x float> @_Z5roundDv3_f(<3 x float>)
+declare <4 x float> @_Z5roundDv4_f(<4 x float>)
+declare <8 x float> @_Z5roundDv8_f(<8 x float>)
+declare <16 x float> @_Z5roundDv16_f(<16 x float>)
+
+declare double @_Z5roundd(double)
+declare <2 x double> @_Z5roundDv2_d(<2 x double>)
+declare <3 x double> @_Z5roundDv3_d(<3 x double>)
+declare <4 x double> @_Z5roundDv4_d(<4 x double>)
+declare <8 x double> @_Z5roundDv8_d(<8 x double>)
+declare <16 x double> @_Z5roundDv16_d(<16 x double>)
+
+declare half @_Z5roundDh(half)
+declare <2 x half> @_Z5roundDv2_Dh(<2 x half>)
+declare <3 x half> @_Z5roundDv3_Dh(<3 x half>)
+declare <4 x half> @_Z5roundDv4_Dh(<4 x half>)
+declare <8 x half> @_Z5roundDv8_Dh(<8 x half>)
+declare <16 x half> @_Z5roundDv16_Dh(<16 x half>)
+
+define float @test_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5roundf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define <3 x float> @test_rint_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_rint_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x float> @_Z5roundDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[RINT]]
+;
+  %rint = tail call <3 x float> @_Z5roundDv3_f(<3 x float> %arg)
+  ret <3 x float> %rint
+}
+
+define <4 x float> @test_rint_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_rint_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x float> @_Z5roundDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[RINT]]
+;
+  %rint = tail call <4 x float> @_Z5roundDv4_f(<4 x float> %arg)
+  ret <4 x float> %rint
+}
+
+define <8 x float> @test_rint_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_rint_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x float> @_Z5roundDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[RINT]]
+;
+  %rint = tail call <8 x float> @_Z5roundDv8_f(<8 x float> %arg)
+  ret <8 x float> %rint
+}
+
+define <16 x float> @test_rint_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_rint_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x float> @_Z5roundDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[RINT]]
+;
+  %rint = tail call <16 x float> @_Z5roundDv16_f(<16 x float> %arg)
+  ret <16 x float> %rint
+}
+
+define double @test_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @_Z5roundd(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @_Z5roundd(double %arg)
+  ret double %rint
+}
+
+define <2 x double> @test_rint_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_rint_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x double> @_Z5roundDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[RINT]]
+;
+  %rint = tail call <2 x double> @_Z5roundDv2_d(<2 x double> %arg)
+  ret <2 x double> %rint
+}
+
+define <3 x double> @test_rint_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_rint_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x double> @_Z5roundDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[RINT]]
+;
+  %rint = tail call <3 x double> @_Z5roundDv3_d(<3 x double> %arg)
+  ret <3 x double> %rint
+}
+
+define <4 x double> @test_rint_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_rint_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x double> @_Z5roundDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[RINT]]
+;
+  %rint = tail call <4 x double> @_Z5roundDv4_d(<4 x double> %arg)
+  ret <4 x double> %rint
+}
+
+define <8 x double> @test_rint_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_rint_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x double> @_Z5roundDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[RINT]]
+;
+  %rint = tail call <8 x double> @_Z5roundDv8_d(<8 x double> %arg)
+  ret <8 x double> %rint
+}
+
+define <16 x double> @test_rint_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_rint_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x double> @_Z5roundDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[RINT]]
+;
+  %rint = tail call <16 x double> @_Z5roundDv16_d(<16 x double> %arg)
+  ret <16 x double> %rint
+}
+
+define half @test_rint_f16(half %arg) {
+; CHECK-LABEL: define half @test_rint_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call half @_Z5roundDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[RINT]]
+;
+  %rint = tail call half @_Z5roundDh(half %arg)
+  ret half %rint
+}
+
+define <2 x half> @test_rint_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_rint_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x half> @_Z5roundDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[RINT]]
+;
+  %rint = tail call <2 x half> @_Z5roundDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %rint
+}
+
+define <3 x half> @test_rint_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_rint_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x half> @_Z5roundDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[RINT]]
+;
+  %rint = tail call <3 x half> @_Z5roundDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %rint
+}
+
+define <4 x half> @test_rint_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_rint_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x half> @_Z5roundDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[RINT]]
+;
+  %rint = tail call <4 x half> @_Z5roundDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %rint
+}
+
+define <8 x half> @test_rint_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_rint_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x half> @_Z5roundDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[RINT]]
+;
+  %rint = tail call <8 x half> @_Z5roundDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %rint
+}
+
+define <16 x half> @test_rint_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_rint_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x half> @_Z5roundDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[RINT]]
+;
+  %rint = tail call <16 x half> @_Z5roundDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %rint
+}
+
+define float @test_rint_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5roundf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+; "no-builtins" should be ignored
+define float @test_rint_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5roundf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5roundf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5roundf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5roundDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5roundf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5roundf(float %arg), !foo !0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5roundDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %rint
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @rintf(float) #2
+declare double @rint(double) #2
+
+define float @test_libm_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @rintf(float %arg)
+  ret float %rint
+}
+
+define double @test_libm_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @rint(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @rint(double %arg)
+  ret double %rint
+}
+
+define float @test_rint_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_rint_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5roundf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan float @_Z5roundf(float %arg) #3
+  ret float %rint
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z4sqrtf(float)
+declare <2 x float> @_Z4sqrtDv2_f(<2 x float>)
+declare <3 x float> @_Z4sqrtDv3_f(<3 x float>)
+declare <4 x float> @_Z4sqrtDv4_f(<4 x float>)
+declare <8 x float> @_Z4sqrtDv8_f(<8 x float>)
+declare <16 x float> @_Z4sqrtDv16_f(<16 x float>)
+
+declare double @_Z4sqrtd(double)
+declare <2 x double> @_Z4sqrtDv2_d(<2 x double>)
+declare <3 x double> @_Z4sqrtDv3_d(<3 x double>)
+declare <4 x double> @_Z4sqrtDv4_d(<4 x double>)
+declare <8 x double> @_Z4sqrtDv8_d(<8 x double>)
+declare <16 x double> @_Z4sqrtDv16_d(<16 x double>)
+
+declare half @_Z4sqrtDh(half)
+declare <2 x half> @_Z4sqrtDv2_Dh(<2 x half>)
+declare <3 x half> @_Z4sqrtDv3_Dh(<3 x half>)
+declare <4 x half> @_Z4sqrtDv4_Dh(<4 x half>)
+declare <8 x half> @_Z4sqrtDv8_Dh(<8 x half>)
+declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)
+
+define float @test_sqrt_f32(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %sqrt
+}
+
+define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <3 x float> [[SQRT]]
+;
+  %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
+  ret <3 x float> %sqrt
+}
+
+define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <4 x float> [[SQRT]]
+;
+  %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
+  ret <4 x float> %sqrt
+}
+
+define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <8 x float> [[SQRT]]
+;
+  %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
+  ret <8 x float> %sqrt
+}
+
+define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <16 x float> [[SQRT]]
+;
+  %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
+  ret <16 x float> %sqrt
+}
+
+define float @test_sqrt_cr_f32(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_cr_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg)
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
+  ret <2 x float> %sqrt
+}
+
+define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[SQRT]]
+;
+  %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
+  ret <3 x float> %sqrt
+}
+
+define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[SQRT]]
+;
+  %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
+  ret <4 x float> %sqrt
+}
+
+define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[SQRT]]
+;
+  %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
+  ret <8 x float> %sqrt
+}
+
+define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[SQRT]]
+;
+  %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
+  ret <16 x float> %sqrt
+}
+
+define double @test_sqrt_f64(double %arg) {
+; CHECK-LABEL: define double @test_sqrt_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %sqrt = tail call double @_Z4sqrtd(double %arg)
+  ret double %sqrt
+}
+
+define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[SQRT]]
+;
+  %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
+  ret <2 x double> %sqrt
+}
+
+define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[SQRT]]
+;
+  %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
+  ret <3 x double> %sqrt
+}
+
+define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[SQRT]]
+;
+  %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
+  ret <4 x double> %sqrt
+}
+
+define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[SQRT]]
+;
+  %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
+  ret <8 x double> %sqrt
+}
+
+define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[SQRT]]
+;
+  %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
+  ret <16 x double> %sqrt
+}
+
+define half @test_sqrt_f16(half %arg) {
+; CHECK-LABEL: define half @test_sqrt_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[SQRT]]
+;
+  %sqrt = tail call half @_Z4sqrtDh(half %arg)
+  ret half %sqrt
+}
+
+define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[SQRT]]
+;
+  %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %sqrt
+}
+
+define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[SQRT]]
+;
+  %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %sqrt
+}
+
+define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[SQRT]]
+;
+  %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %sqrt
+}
+
+define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[SQRT]]
+;
+  %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %sqrt
+}
+
+define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[SQRT]]
+;
+  %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %sqrt
+}
+
+define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath !0
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %sqrt
+}
+
+define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg) #0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %sqrt
+}
+
+; "no-builtins" should be ignored
+define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath !0
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
+  ret <2 x float> %sqrt
+}
+
+define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @_Z4sqrtf(float %arg) #0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %sqrt
+}
+
+define float @test_sqrt_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
+  ret <2 x float> %sqrt
+}
+
+define float @test_sqrt_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath !0, !foo !1
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
+  ret <2 x float> %sqrt
+}
+
+define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg)
+  ret float %sqrt
+}
+
+define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[SQRT]]
+;
+  %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
+  ret <2 x float> %sqrt
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @sqrtf(float) #2
+declare double @sqrt(double) #2
+
+define float @test_libm_sqrt_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_sqrt_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]])
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @sqrtf(float %arg)
+  ret float %sqrt
+}
+
+define float @test_libm_sqrt_f32_fpmath(float %arg) {
+; CHECK-LABEL: define float @test_libm_sqrt_f32_fpmath
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %sqrt = tail call float @sqrtf(float %arg), !fpmath !0
+  ret float %sqrt
+}
+
+define double @test_libm_sqrt_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_sqrt_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %sqrt = tail call double @sqrt(double %arg)
+  ret double %sqrt
+}
+
+define double @test_libm_sqrt_f64_fpmath(double %arg) {
+; CHECK-LABEL: define double @test_libm_sqrt_f64_fpmath
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]), !fpmath !0
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %sqrt = tail call double @sqrt(double %arg), !fpmath !0
+  ret double %sqrt
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+
+!0 = !{float 3.000000e+00}
+!1 = !{i32 1234}
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z5truncf(float)
+declare <2 x float> @_Z5truncDv2_f(<2 x float>)
+declare <3 x float> @_Z5truncDv3_f(<3 x float>)
+declare <4 x float> @_Z5truncDv4_f(<4 x float>)
+declare <8 x float> @_Z5truncDv8_f(<8 x float>)
+declare <16 x float> @_Z5truncDv16_f(<16 x float>)
+
+declare double @_Z5truncd(double)
+declare <2 x double> @_Z5truncDv2_d(<2 x double>)
+declare <3 x double> @_Z5truncDv3_d(<3 x double>)
+declare <4 x double> @_Z5truncDv4_d(<4 x double>)
+declare <8 x double> @_Z5truncDv8_d(<8 x double>)
+declare <16 x double> @_Z5truncDv16_d(<16 x double>)
+
+declare half @_Z5truncDh(half)
+declare <2 x half> @_Z5truncDv2_Dh(<2 x half>)
+declare <3 x half> @_Z5truncDv3_Dh(<3 x half>)
+declare <4 x half> @_Z5truncDv4_Dh(<4 x half>)
+declare <8 x half> @_Z5truncDv8_Dh(<8 x half>)
+declare <16 x half> @_Z5truncDv16_Dh(<16 x half>)
+
+define float @test_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5truncf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define <3 x float> @test_rint_v3f32(<3 x float> %arg) {
+; CHECK-LABEL: define <3 x float> @test_rint_v3f32
+; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x float> @_Z5truncDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    ret <3 x float> [[RINT]]
+;
+  %rint = tail call <3 x float> @_Z5truncDv3_f(<3 x float> %arg)
+  ret <3 x float> %rint
+}
+
+define <4 x float> @test_rint_v4f32(<4 x float> %arg) {
+; CHECK-LABEL: define <4 x float> @test_rint_v4f32
+; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x float> @_Z5truncDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    ret <4 x float> [[RINT]]
+;
+  %rint = tail call <4 x float> @_Z5truncDv4_f(<4 x float> %arg)
+  ret <4 x float> %rint
+}
+
+define <8 x float> @test_rint_v8f32(<8 x float> %arg) {
+; CHECK-LABEL: define <8 x float> @test_rint_v8f32
+; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x float> @_Z5truncDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    ret <8 x float> [[RINT]]
+;
+  %rint = tail call <8 x float> @_Z5truncDv8_f(<8 x float> %arg)
+  ret <8 x float> %rint
+}
+
+define <16 x float> @test_rint_v16f32(<16 x float> %arg) {
+; CHECK-LABEL: define <16 x float> @test_rint_v16f32
+; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x float> @_Z5truncDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    ret <16 x float> [[RINT]]
+;
+  %rint = tail call <16 x float> @_Z5truncDv16_f(<16 x float> %arg)
+  ret <16 x float> %rint
+}
+
+define double @test_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @_Z5truncd(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @_Z5truncd(double %arg)
+  ret double %rint
+}
+
+define <2 x double> @test_rint_v2f64(<2 x double> %arg) {
+; CHECK-LABEL: define <2 x double> @test_rint_v2f64
+; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x double> @_Z5truncDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    ret <2 x double> [[RINT]]
+;
+  %rint = tail call <2 x double> @_Z5truncDv2_d(<2 x double> %arg)
+  ret <2 x double> %rint
+}
+
+define <3 x double> @test_rint_v3f64(<3 x double> %arg) {
+; CHECK-LABEL: define <3 x double> @test_rint_v3f64
+; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x double> @_Z5truncDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    ret <3 x double> [[RINT]]
+;
+  %rint = tail call <3 x double> @_Z5truncDv3_d(<3 x double> %arg)
+  ret <3 x double> %rint
+}
+
+define <4 x double> @test_rint_v4f64(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x double> @test_rint_v4f64
+; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x double> @_Z5truncDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    ret <4 x double> [[RINT]]
+;
+  %rint = tail call <4 x double> @_Z5truncDv4_d(<4 x double> %arg)
+  ret <4 x double> %rint
+}
+
+define <8 x double> @test_rint_v8f64(<8 x double> %arg) {
+; CHECK-LABEL: define <8 x double> @test_rint_v8f64
+; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x double> @_Z5truncDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    ret <8 x double> [[RINT]]
+;
+  %rint = tail call <8 x double> @_Z5truncDv8_d(<8 x double> %arg)
+  ret <8 x double> %rint
+}
+
+define <16 x double> @test_rint_v16f64(<16 x double> %arg) {
+; CHECK-LABEL: define <16 x double> @test_rint_v16f64
+; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x double> @_Z5truncDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    ret <16 x double> [[RINT]]
+;
+  %rint = tail call <16 x double> @_Z5truncDv16_d(<16 x double> %arg)
+  ret <16 x double> %rint
+}
+
+define half @test_rint_f16(half %arg) {
+; CHECK-LABEL: define half @test_rint_f16
+; CHECK-SAME: (half [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call half @_Z5truncDh(half [[ARG]])
+; CHECK-NEXT:    ret half [[RINT]]
+;
+  %rint = tail call half @_Z5truncDh(half %arg)
+  ret half %rint
+}
+
+define <2 x half> @test_rint_v2f16(<2 x half> %arg) {
+; CHECK-LABEL: define <2 x half> @test_rint_v2f16
+; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x half> @_Z5truncDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    ret <2 x half> [[RINT]]
+;
+  %rint = tail call <2 x half> @_Z5truncDv2_Dh(<2 x half> %arg)
+  ret <2 x half> %rint
+}
+
+define <3 x half> @test_rint_v3f16(<3 x half> %arg) {
+; CHECK-LABEL: define <3 x half> @test_rint_v3f16
+; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x half> @_Z5truncDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    ret <3 x half> [[RINT]]
+;
+  %rint = tail call <3 x half> @_Z5truncDv3_Dh(<3 x half> %arg)
+  ret <3 x half> %rint
+}
+
+define <4 x half> @test_rint_v4f16(<4 x half> %arg) {
+; CHECK-LABEL: define <4 x half> @test_rint_v4f16
+; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x half> @_Z5truncDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    ret <4 x half> [[RINT]]
+;
+  %rint = tail call <4 x half> @_Z5truncDv4_Dh(<4 x half> %arg)
+  ret <4 x half> %rint
+}
+
+define <8 x half> @test_rint_v8f16(<8 x half> %arg) {
+; CHECK-LABEL: define <8 x half> @test_rint_v8f16
+; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x half> @_Z5truncDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    ret <8 x half> [[RINT]]
+;
+  %rint = tail call <8 x half> @_Z5truncDv8_Dh(<8 x half> %arg)
+  ret <8 x half> %rint
+}
+
+define <16 x half> @test_rint_v16f16(<16 x half> %arg) {
+; CHECK-LABEL: define <16 x half> @test_rint_v16f16
+; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x half> @_Z5truncDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    ret <16 x half> [[RINT]]
+;
+  %rint = tail call <16 x half> @_Z5truncDv16_Dh(<16 x half> %arg)
+  ret <16 x half> %rint
+}
+
+define float @test_rint_f32_nobuiltin_callsite(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5truncf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+; "no-builtins" should be ignored
+define float @test_rint_f32_nobuiltins(float %arg) #1 {
+; CHECK-LABEL: define float @test_rint_f32_nobuiltins
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @_Z5truncf(float %arg) #0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5truncf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5truncf(float %arg)
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg)
+  ret <2 x float> %rint
+}
+
+define float @test_rint_f32_preserve_flags_md(float %arg) {
+; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @_Z5truncf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan ninf float @_Z5truncf(float %arg), !foo !0
+  ret float %rint
+}
+
+define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
+; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md
+; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    ret <2 x float> [[RINT]]
+;
+  %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg), !foo !0
+  ret <2 x float> %rint
+}
+
+; Test the libm name, not a recognized opencl builtin.
+declare float @rintf(float) #2
+declare double @rint(double) #2
+
+define float @test_libm_rint_f32(float %arg) {
+; CHECK-LABEL: define float @test_libm_rint_f32
+; CHECK-SAME: (float [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call float @rintf(float [[ARG]])
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call float @rintf(float %arg)
+  ret float %rint
+}
+
+define double @test_libm_rint_f64(double %arg) {
+; CHECK-LABEL: define double @test_libm_rint_f64
+; CHECK-SAME: (double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call double @rint(double [[ARG]])
+; CHECK-NEXT:    ret double [[RINT]]
+;
+  %rint = tail call double @rint(double %arg)
+  ret double %rint
+}
+
+define float @test_rint_f32_strictfp(float %arg) #3 {
+; CHECK-LABEL: define float @test_rint_f32_strictfp
+; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5truncf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    ret float [[RINT]]
+;
+  %rint = tail call nnan float @_Z5truncf(float %arg) #3
+  ret float %rint
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+attributes #2 = { nounwind memory(none) }
+attributes #3 = { strictfp }
+
+!0 = !{i32 1234}