Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -103,12 +103,14 @@
   /// AllowMinSize is true, allow the replacement in a minsize function.
   bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
                                          bool AllowMinSizeF32 = false,
-                                         bool AllowF64 = false);
+                                         bool AllowF64 = false,
+                                         bool AllowStrictFP = false);
   void replaceLibCallWithSimpleIntrinsic(CallInst *CI, Intrinsic::ID IntrID);
 
   bool tryReplaceLibcallWithSimpleIntrinsic(CallInst *CI, Intrinsic::ID IntrID,
                                             bool AllowMinSizeF32 = false,
-                                            bool AllowF64 = false);
+                                            bool AllowF64 = false,
+                                            bool AllowStrictFP = false);
 
 protected:
   bool isUnsafeMath(const FPMathOperator *FPOp) const;
@@ -583,8 +585,8 @@
 
     // Specialized optimizations for each function call.
     //
-    // TODO: Handle other simple intrinsic wrappers. Sqrt, copysign, fabs,
-    // ldexp, rounding intrinsics.
+    // TODO: Handle other simple intrinsic wrappers. Sqrt, ldexp, rounding
+    // intrinsics.
     //
     // TODO: Handle native functions
     switch (FInfo.getId()) {
@@ -625,6 +627,12 @@
     case AMDGPULibFunc::EI_MAD:
       return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::fmuladd, true,
                                                   true);
+    case AMDGPULibFunc::EI_FABS:
+      return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::fabs, true,
+                                                  true, true);
+    case AMDGPULibFunc::EI_COPYSIGN:
+      return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::copysign, true,
+                                                  true, true);
     case AMDGPULibFunc::EI_POW:
     case AMDGPULibFunc::EI_POWR:
     case AMDGPULibFunc::EI_POWN:
@@ -1081,7 +1089,8 @@
 // substituting them with direct calls with all the flags.
 bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
                                                        bool AllowMinSizeF32,
-                                                       bool AllowF64) {
+                                                       bool AllowF64,
+                                                       bool AllowStrictFP) {
   Type *FltTy = CI->getType()->getScalarType();
   const bool IsF32 = FltTy->isFloatTy();
 
@@ -1096,7 +1105,7 @@
 
   const Function *ParentF = CI->getFunction();
   // TODO: Handle strictfp
-  if (ParentF->hasFnAttribute(Attribute::StrictFP))
+  if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
     return false;
 
   if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
@@ -1113,8 +1122,10 @@
 bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(CallInst *CI,
                                                           Intrinsic::ID IntrID,
                                                           bool AllowMinSizeF32,
-                                                          bool AllowF64) {
-  if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64))
+                                                          bool AllowF64,
+                                                          bool AllowStrictFP) {
+  if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
+                                         AllowStrictFP))
     return false;
   replaceLibCallWithSimpleIntrinsic(CI, IntrID);
   return true;
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
@@ -25,7 +25,7 @@
 define float @test_copysign_f32(float %x, float %y) {
 ; CHECK-LABEL: define float @test_copysign_f32
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @llvm.copysign.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call float @_Z8copysignff(float %x, float %y)
@@ -35,7 +35,7 @@
 define float @test_copysign_f32_nnan(float %x, float %y) {
 ; CHECK-LABEL: define float @test_copysign_f32_nnan
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @llvm.copysign.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call nnan float @_Z8copysignff(float %x, float %y)
@@ -45,7 +45,7 @@
 define <2 x float> @test_copysign_v2f32(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: define <2 x float> @test_copysign_v2f32
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x float> @llvm.copysign.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
 ; CHECK-NEXT:    ret <2 x float> [[COPYSIGN]]
 ;
   %copysign = tail call <2 x float> @_Z8copysignDv2_fS_(<2 x float> %x, <2 x float> %y)
@@ -55,7 +55,7 @@
 define <3 x float> @test_copysign_v3f32(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_copysign_v3f32
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x float> @llvm.copysign.v3f32(<3 x float> [[X]], <3 x float> [[Y]])
 ; CHECK-NEXT:    ret <3 x float> [[COPYSIGN]]
 ;
   %copysign = tail call <3 x float> @_Z8copysignDv3_fS_(<3 x float> %x, <3 x float> %y)
@@ -65,7 +65,7 @@
 define <4 x float> @test_copysign_v4f32(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: define <4 x float> @test_copysign_v4f32
 ; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[X]], <4 x float> [[Y]])
 ; CHECK-NEXT:    ret <4 x float> [[COPYSIGN]]
 ;
   %copysign = tail call <4 x float> @_Z8copysignDv4_fS_(<4 x float> %x, <4 x float> %y)
@@ -75,7 +75,7 @@
 define <8 x float> @test_copysign_v8f32(<8 x float> %x, <8 x float> %y) {
 ; CHECK-LABEL: define <8 x float> @test_copysign_v8f32
 ; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x float> @llvm.copysign.v8f32(<8 x float> [[X]], <8 x float> [[Y]])
 ; CHECK-NEXT:    ret <8 x float> [[COPYSIGN]]
 ;
   %copysign = tail call <8 x float> @_Z8copysignDv8_fS_(<8 x float> %x, <8 x float> %y)
@@ -85,7 +85,7 @@
 define <16 x float> @test_copysign_v16f32(<16 x float> %x, <16 x float> %y) {
 ; CHECK-LABEL: define <16 x float> @test_copysign_v16f32
 ; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x float> @llvm.copysign.v16f32(<16 x float> [[X]], <16 x float> [[Y]])
 ; CHECK-NEXT:    ret <16 x float> [[COPYSIGN]]
 ;
   %copysign = tail call <16 x float> @_Z8copysignDv16_fS_(<16 x float> %x, <16 x float> %y)
@@ -95,7 +95,7 @@
 define double @test_copysign_f64(double %x, double %y) {
 ; CHECK-LABEL: define double @test_copysign_f64
 ; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call double @_Z8copysigndd(double [[X]], double [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call double @llvm.copysign.f64(double [[X]], double [[Y]])
 ; CHECK-NEXT:    ret double [[COPYSIGN]]
 ;
   %copysign = tail call double @_Z8copysigndd(double %x, double %y)
@@ -105,7 +105,7 @@
 define <2 x double> @test_copysign_v2f64(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: define <2 x double> @test_copysign_v2f64
 ; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X]], <2 x double> [[Y]])
 ; CHECK-NEXT:    ret <2 x double> [[COPYSIGN]]
 ;
   %copysign = tail call <2 x double> @_Z8copysignDv2_dS_(<2 x double> %x, <2 x double> %y)
@@ -115,7 +115,7 @@
 define <3 x double> @test_copysign_v3f64(<3 x double> %x, <3 x double> %y) {
 ; CHECK-LABEL: define <3 x double> @test_copysign_v3f64
 ; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x double> @llvm.copysign.v3f64(<3 x double> [[X]], <3 x double> [[Y]])
 ; CHECK-NEXT:    ret <3 x double> [[COPYSIGN]]
 ;
   %copysign = tail call <3 x double> @_Z8copysignDv3_dS_(<3 x double> %x, <3 x double> %y)
@@ -125,7 +125,7 @@
 define <4 x double> @test_copysign_v4f64(<4 x double> %x, <4 x double> %y) {
 ; CHECK-LABEL: define <4 x double> @test_copysign_v4f64
 ; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x double> @llvm.copysign.v4f64(<4 x double> [[X]], <4 x double> [[Y]])
 ; CHECK-NEXT:    ret <4 x double> [[COPYSIGN]]
 ;
   %copysign = tail call <4 x double> @_Z8copysignDv4_dS_(<4 x double> %x, <4 x double> %y)
@@ -135,7 +135,7 @@
 define <8 x double> @test_copysign_v8f64(<8 x double> %x, <8 x double> %y) {
 ; CHECK-LABEL: define <8 x double> @test_copysign_v8f64
 ; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x double> @llvm.copysign.v8f64(<8 x double> [[X]], <8 x double> [[Y]])
 ; CHECK-NEXT:    ret <8 x double> [[COPYSIGN]]
 ;
   %copysign = tail call <8 x double> @_Z8copysignDv8_dS_(<8 x double> %x, <8 x double> %y)
@@ -145,7 +145,7 @@
 define <16 x double> @test_copysign_v16f64(<16 x double> %x, <16 x double> %y) {
 ; CHECK-LABEL: define <16 x double> @test_copysign_v16f64
 ; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x double> @llvm.copysign.v16f64(<16 x double> [[X]], <16 x double> [[Y]])
 ; CHECK-NEXT:    ret <16 x double> [[COPYSIGN]]
 ;
   %copysign = tail call <16 x double> @_Z8copysignDv16_dS_(<16 x double> %x, <16 x double> %y)
@@ -155,7 +155,7 @@
 define half @test_copysign_f16(half %x, half %y) {
 ; CHECK-LABEL: define half @test_copysign_f16
 ; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call half @_Z8copysignDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call half @llvm.copysign.f16(half [[X]], half [[Y]])
 ; CHECK-NEXT:    ret half [[COPYSIGN]]
 ;
   %copysign = tail call half @_Z8copysignDhDh(half %x, half %y)
@@ -165,7 +165,7 @@
 define <2 x half> @test_copysign_v2f16(<2 x half> %x, <2 x half> %y) {
 ; CHECK-LABEL: define <2 x half> @test_copysign_v2f16
 ; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <2 x half> @llvm.copysign.v2f16(<2 x half> [[X]], <2 x half> [[Y]])
 ; CHECK-NEXT:    ret <2 x half> [[COPYSIGN]]
 ;
   %copysign = tail call <2 x half> @_Z8copysignDv2_DhS_(<2 x half> %x, <2 x half> %y)
@@ -175,7 +175,7 @@
 define <3 x half> @test_copysign_v3f16(<3 x half> %x, <3 x half> %y) {
 ; CHECK-LABEL: define <3 x half> @test_copysign_v3f16
 ; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <3 x half> @llvm.copysign.v3f16(<3 x half> [[X]], <3 x half> [[Y]])
 ; CHECK-NEXT:    ret <3 x half> [[COPYSIGN]]
 ;
   %copysign = tail call <3 x half> @_Z8copysignDv3_DhS_(<3 x half> %x, <3 x half> %y)
@@ -185,7 +185,7 @@
 define <4 x half> @test_copysign_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-LABEL: define <4 x half> @test_copysign_v4f16
 ; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <4 x half> @llvm.copysign.v4f16(<4 x half> [[X]], <4 x half> [[Y]])
 ; CHECK-NEXT:    ret <4 x half> [[COPYSIGN]]
 ;
   %copysign = tail call <4 x half> @_Z8copysignDv4_DhS_(<4 x half> %x, <4 x half> %y)
@@ -195,7 +195,7 @@
 define <8 x half> @test_copysign_v8f16(<8 x half> %x, <8 x half> %y) {
 ; CHECK-LABEL: define <8 x half> @test_copysign_v8f16
 ; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <8 x half> @llvm.copysign.v8f16(<8 x half> [[X]], <8 x half> [[Y]])
 ; CHECK-NEXT:    ret <8 x half> [[COPYSIGN]]
 ;
   %copysign = tail call <8 x half> @_Z8copysignDv8_DhS_(<8 x half> %x, <8 x half> %y)
@@ -205,7 +205,7 @@
 define <16 x half> @test_copysign_v16f16(<16 x half> %x, <16 x half> %y) {
 ; CHECK-LABEL: define <16 x half> @test_copysign_v16f16
 ; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call <16 x half> @llvm.copysign.v16f16(<16 x half> [[X]], <16 x half> [[Y]])
 ; CHECK-NEXT:    ret <16 x half> [[COPYSIGN]]
 ;
   %copysign = tail call <16 x half> @_Z8copysignDv16_DhS_(<16 x half> %x, <16 x half> %y)
@@ -215,7 +215,7 @@
 define float @test_copysign_f32_minsize(float %x, float %y) #0 {
 ; CHECK-LABEL: define float @test_copysign_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @llvm.copysign.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call float @_Z8copysignff(float %x, float %y)
@@ -225,7 +225,7 @@
 define float @test_copysign_f32_nnan_minsize(float %x, float %y) #0 {
 ; CHECK-LABEL: define float @test_copysign_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @llvm.copysign.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call nnan float @_Z8copysignff(float %x, float %y)
@@ -235,7 +235,7 @@
 define float @test_copysign_f32_noinline(float %x, float %y) {
 ; CHECK-LABEL: define float @test_copysign_f32_noinline
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call float @_Z8copysignff(float %x, float %y) #1
@@ -245,7 +245,7 @@
 define float @test_copysign_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-LABEL: define float @test_copysign_f32_nnan_noinline
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR2]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) #1
@@ -255,7 +255,7 @@
 define float @test_copysign_f32_strictfp(float %x, float %y) #2 {
 ; CHECK-LABEL: define float @test_copysign_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan nsz float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR1]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan nsz float @llvm.copysign.f32(float [[X]], float [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call nsz nnan float @_Z8copysignff(float %x, float %y) #2
@@ -265,7 +265,7 @@
 define float @test_copysign_f32_fast_nobuiltin(float %x, float %y) {
 ; CHECK-LABEL: define float @test_copysign_f32_fast_nobuiltin
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call fast float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call fast float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %copysign = tail call fast float @_Z8copysignff(float %x, float %y) #3
Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
@@ -27,7 +27,7 @@
 define float @test_fabs_f32(float %arg) {
 ; CHECK-LABEL: define float @test_fabs_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call float @_Z4fabsf(float %arg)
@@ -37,7 +37,7 @@
 define <2 x float> @test_fabs_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
   %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg)
@@ -47,7 +47,7 @@
 define <3 x float> @test_fabs_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_fabs_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x float> @llvm.fabs.v3f32(<3 x float> [[ARG]])
 ; CHECK-NEXT:    ret <3 x float> [[FABS]]
 ;
   %fabs = tail call <3 x float> @_Z4fabsDv3_f(<3 x float> %arg)
@@ -57,7 +57,7 @@
 define <4 x float> @test_fabs_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_fabs_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[ARG]])
 ; CHECK-NEXT:    ret <4 x float> [[FABS]]
 ;
   %fabs = tail call <4 x float> @_Z4fabsDv4_f(<4 x float> %arg)
@@ -67,7 +67,7 @@
 define <8 x float> @test_fabs_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_fabs_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x float> @llvm.fabs.v8f32(<8 x float> [[ARG]])
 ; CHECK-NEXT:    ret <8 x float> [[FABS]]
 ;
   %fabs = tail call <8 x float> @_Z4fabsDv8_f(<8 x float> %arg)
@@ -77,7 +77,7 @@
 define <16 x float> @test_fabs_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_fabs_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x float> @llvm.fabs.v16f32(<16 x float> [[ARG]])
 ; CHECK-NEXT:    ret <16 x float> [[FABS]]
 ;
   %fabs = tail call <16 x float> @_Z4fabsDv16_f(<16 x float> %arg)
@@ -87,7 +87,7 @@
 define double @test_fabs_f64(double %arg) {
 ; CHECK-LABEL: define double @test_fabs_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call double @_Z4fabsd(double [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
 ; CHECK-NEXT:    ret double [[FABS]]
 ;
   %fabs = tail call double @_Z4fabsd(double %arg)
@@ -97,7 +97,7 @@
 define <2 x double> @test_fabs_v2f64(<2 x double> %arg) {
 ; CHECK-LABEL: define <2 x double> @test_fabs_v2f64
 ; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[ARG]])
 ; CHECK-NEXT:    ret <2 x double> [[FABS]]
 ;
   %fabs = tail call <2 x double> @_Z4fabsDv2_d(<2 x double> %arg)
@@ -107,7 +107,7 @@
 define <3 x double> @test_fabs_v3f64(<3 x double> %arg) {
 ; CHECK-LABEL: define <3 x double> @test_fabs_v3f64
 ; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x double> @llvm.fabs.v3f64(<3 x double> [[ARG]])
 ; CHECK-NEXT:    ret <3 x double> [[FABS]]
 ;
   %fabs = tail call <3 x double> @_Z4fabsDv3_d(<3 x double> %arg)
@@ -117,7 +117,7 @@
 define <4 x double> @test_fabs_v4f64(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x double> @test_fabs_v4f64
 ; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
 ; CHECK-NEXT:    ret <4 x double> [[FABS]]
 ;
   %fabs = tail call <4 x double> @_Z4fabsDv4_d(<4 x double> %arg)
@@ -127,7 +127,7 @@
 define <8 x double> @test_fabs_v8f64(<8 x double> %arg) {
 ; CHECK-LABEL: define <8 x double> @test_fabs_v8f64
 ; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x double> @llvm.fabs.v8f64(<8 x double> [[ARG]])
 ; CHECK-NEXT:    ret <8 x double> [[FABS]]
 ;
   %fabs = tail call <8 x double> @_Z4fabsDv8_d(<8 x double> %arg)
@@ -137,7 +137,7 @@
 define <16 x double> @test_fabs_v16f64(<16 x double> %arg) {
 ; CHECK-LABEL: define <16 x double> @test_fabs_v16f64
 ; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x double> @llvm.fabs.v16f64(<16 x double> [[ARG]])
 ; CHECK-NEXT:    ret <16 x double> [[FABS]]
 ;
   %fabs = tail call <16 x double> @_Z4fabsDv16_d(<16 x double> %arg)
@@ -147,7 +147,7 @@
 define half @test_fabs_f16(half %arg) {
 ; CHECK-LABEL: define half @test_fabs_f16
 ; CHECK-SAME: (half [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call half @_Z4fabsDh(half [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[ARG]])
 ; CHECK-NEXT:    ret half [[FABS]]
 ;
   %fabs = tail call half @_Z4fabsDh(half %arg)
@@ -157,7 +157,7 @@
 define <2 x half> @test_fabs_v2f16(<2 x half> %arg) {
 ; CHECK-LABEL: define <2 x half> @test_fabs_v2f16
 ; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[ARG]])
 ; CHECK-NEXT:    ret <2 x half> [[FABS]]
 ;
   %fabs = tail call <2 x half> @_Z4fabsDv2_Dh(<2 x half> %arg)
@@ -167,7 +167,7 @@
 define <3 x half> @test_fabs_v3f16(<3 x half> %arg) {
 ; CHECK-LABEL: define <3 x half> @test_fabs_v3f16
 ; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <3 x half> @llvm.fabs.v3f16(<3 x half> [[ARG]])
 ; CHECK-NEXT:    ret <3 x half> [[FABS]]
 ;
   %fabs = tail call <3 x half> @_Z4fabsDv3_Dh(<3 x half> %arg)
@@ -177,7 +177,7 @@
 define <4 x half> @test_fabs_v4f16(<4 x half> %arg) {
 ; CHECK-LABEL: define <4 x half> @test_fabs_v4f16
 ; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> [[ARG]])
 ; CHECK-NEXT:    ret <4 x half> [[FABS]]
 ;
   %fabs = tail call <4 x half> @_Z4fabsDv4_Dh(<4 x half> %arg)
@@ -187,7 +187,7 @@
 define <8 x half> @test_fabs_v8f16(<8 x half> %arg) {
 ; CHECK-LABEL: define <8 x half> @test_fabs_v8f16
 ; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[ARG]])
 ; CHECK-NEXT:    ret <8 x half> [[FABS]]
 ;
   %fabs = tail call <8 x half> @_Z4fabsDv8_Dh(<8 x half> %arg)
@@ -197,7 +197,7 @@
 define <16 x half> @test_fabs_v16f16(<16 x half> %arg) {
 ; CHECK-LABEL: define <16 x half> @test_fabs_v16f16
 ; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <16 x half> @llvm.fabs.v16f16(<16 x half> [[ARG]])
 ; CHECK-NEXT:    ret <16 x half> [[FABS]]
 ;
   %fabs = tail call <16 x half> @_Z4fabsDv16_Dh(<16 x half> %arg)
@@ -207,7 +207,7 @@
 define float @test_fabs_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_fabs_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call float @_Z4fabsf(float %arg) #0
@@ -217,7 +217,7 @@
 define <2 x float> @test_fabs_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
   %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
@@ -228,7 +228,7 @@
 define float @test_fabs_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_fabs_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call float @_Z4fabsf(float %arg) #0
@@ -238,7 +238,7 @@
 define <2 x float> @test_fabs_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
   %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
@@ -248,7 +248,7 @@
 define float @test_fabs_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_fabs_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @llvm.fabs.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call nnan ninf float @_Z4fabsf(float %arg)
@@ -258,7 +258,7 @@
 define <2 x float> @test_fabs_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @llvm.fabs.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
   %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg)
@@ -268,7 +268,7 @@
 define float @test_fabs_f32_preserve_flags_md(float %arg) {
 ; CHECK-LABEL: define float @test_fabs_f32_preserve_flags_md
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @_Z4fabsf(float [[ARG]]), !foo !0
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan ninf float @llvm.fabs.f32(float [[ARG]]), !foo !0
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call nnan ninf float @_Z4fabsf(float %arg), !foo !0
@@ -278,7 +278,7 @@
 define <2 x float> @test_fabs_v2f32_preserve_flags_md(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_preserve_flags_md
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]), !foo !0
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan nsz contract <2 x float> @llvm.fabs.v2f32(<2 x float> [[ARG]]), !foo !0
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
   %fabs = tail call contract nsz nnan <2 x float> @_Z4fabsDv2_f(<2 x float> %arg), !foo !0
@@ -312,7 +312,7 @@
 define float @test_fabs_f32_strictfp(float %arg) #3 {
 ; CHECK-LABEL: define float @test_fabs_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan float @_Z4fabsf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan float @llvm.fabs.f32(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
   %fabs = tail call nnan float @_Z4fabsf(float %arg) #3