Index: test/CodeGen/AMDGPU/default-fp-mode.ll
===================================================================
--- test/CodeGen/AMDGPU/default-fp-mode.ll
+++ test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -1,36 +1,62 @@
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; FUNC-LABEL: {{^}}test_kernel:
+; GCN-LABEL: {{^}}test_default_si:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
 
-; DEFAULT: FloatMode: 192
-; DEFAULT: IeeeMode: 0
+; GCN-LABEL: {{^}}test_default_vi:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
 
-; FP64-DENORMAL: FloatMode: 192
-; FP64-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f64_denormals:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
 
-; FP32-DENORMAL: FloatMode: 48
-; FP32-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f32_denormals:
+; GCNL: FloatMode: 48
+; GCN: IeeeMode: 0
+define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
 
-; BOTH-DENORMAL: FloatMode: 240
-; BOTH-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f32_f64_denormals:
+; GCN: FloatMode: 240
+; GCN: IeeeMode: 0
+define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
 
-; NO-DENORMAL: FloatMode: 0
-; NO-DENORMAL: IeeeMode: 0
-define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+; GCN-LABEL: {{^}}test_no_denormals
+; GCN: FloatMode: 0
+; GCN: IeeeMode: 0
+define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 {
   store float 0.0, float addrspace(1)* %out0
   store double 0.0, double addrspace(1)* %out1
   ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="tahiti" }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-features"="+fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
Index: test/CodeGen/AMDGPU/fcanonicalize.ll
===================================================================
--- test/CodeGen/AMDGPU/fcanonicalize.ll
+++ test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=NODENORM %s
-; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=DENORM %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 declare float @llvm.canonicalize.f32(float) #0
 declare double @llvm.canonicalize.f64(double) #0
@@ -68,21 +67,37 @@
   ret void
 }
 
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f32:
-; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
+  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define void @test_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
   store float %canonicalized, float addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f32:
-; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
+  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define void @test_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
   store float %canonicalized, float addrspace(1)* %out
   ret void
@@ -220,27 +235,41 @@
   ret void
 }
 
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f64:
-; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
-; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
 
-; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
   store double %canonicalized, double addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f64:
-; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
-; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
 
-; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
   store double %canonicalized, double addrspace(1)* %out
   ret void
@@ -318,3 +347,5 @@
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
Index: test/CodeGen/AMDGPU/ffloor.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/ffloor.f64.ll
+++ test/CodeGen/AMDGPU/ffloor.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
@@ -67,15 +67,16 @@
   ret void
 }
 
-; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-;   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
-;   store <3 x double> %y, <3 x double> addrspace(1)* %out
-;   ret void
-; }
+; FUNC-LABEL: {{^}}ffloor_v3f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI-NOT: v_floor_f64_e32
+define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+  %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
+  store <3 x double> %y, <3 x double> addrspace(1)* %out
+  ret void
+}
 
 ; FUNC-LABEL: {{^}}ffloor_v4f64:
 ; CI: v_floor_f64_e32
Index: test/CodeGen/AMDGPU/hsa-fp-mode.ll
===================================================================
--- test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -1,10 +1,68 @@
-; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=COMMON %s
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; COMMON-LABEL: {{^}}test_kernel:
-; COMMON-DENORMAL: compute_pgm_rsrc1_float_mode = compute_pgm_rsrc1_float_mode = 192
-; COMMON-DENORMAL: compute_pgm_rsrc1_dx10_clamp = 1
-define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+; GCN-LABEL: {{^}}test_default_ci:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
   store float 0.0, float addrspace(1)* %out0
   store double 0.0, double addrspace(1)* %out1
   ret void
 }
+
+; GCN-LABEL: {{^}}test_default_vi:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_f64_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_f32_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 48
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_f32_f64_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 240
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_no_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 0
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="kaveri" }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" }
+attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -1,9 +1,4 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare float @llvm.amdgcn.rcp.f32(float) #0
 declare double @llvm.amdgcn.rcp.f64(double) #0
@@ -11,71 +6,123 @@
 declare double @llvm.sqrt.f64(double) #0
 declare float @llvm.sqrt.f32(float) #0
 
+; FUNC-LABEL: {{^}}rcp_undef_f32:
+; SI-NOT: v_rcp_f32
+define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
+  %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
 
-; FUNC-LABEL: {{^}}rcp_f32:
-; SI: v_rcp_f32_e32
-define void @rcp_f32(float addrspace(1)* %out, float %src) #1 {
-  %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0
+; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
+; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
+  %rcp = fdiv float 1.0, %src
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: {{^}}rcp_pat_f32:
+; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
+; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
+  %rcp = fdiv float 1.0, %src
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
 
-; SI-SAFE: v_rcp_f32_e32
-; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
+; SI: v_div_scale_f32
+define void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
   %rcp = fdiv float 1.0, %src
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
-; SI-UNSAFE: v_rsq_f32_e32
-; SI-SAFE: v_sqrt_f32_e32
-; SI-SAFE: v_rcp_f32_e32
-define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
-  %sqrt = call float @llvm.sqrt.f32(float %src) #0
-  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0
+; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
+; SI: v_sqrt_f32_e32
+; SI: v_rcp_f32_e32
+define void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+  %sqrt = call float @llvm.sqrt.f32(float %src)
+  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
+; SI: v_rsq_f32_e32
+define void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
+  %sqrt = call float @llvm.sqrt.f32(float %src)
+  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rcp_f64:
-; SI: v_rcp_f64_e32
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
 define void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
-  %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0
+  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rcp_f64:
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
+  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
   store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rcp_pat_f64:
-; SI: v_rcp_f64_e32
+; SI: v_div_scale_f64
 define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
   %rcp = fdiv double 1.0, %src
   store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE-NOT: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64
-; SI-SAFE: v_rcp_f64
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
-  %sqrt = call double @llvm.sqrt.f64(double %src) #0
-  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0
+; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
+  %rcp = fdiv double 1.0, %src
   store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: {{^}}rcp_undef_f32:
-; SI-NOT: v_rcp_f32
-define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
-  %rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0
-  store float %rcp, float addrspace(1)* %out, align 4
+; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
+; SI-NOT: v_rsq_f64_e32
+; SI: v_sqrt_f64
+; SI: v_rcp_f64
+define void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+  %sqrt = call double @llvm.sqrt.f64(double %src)
+  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
+; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
+  %sqrt = call double @llvm.sqrt.f64(double %src)
+  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
+  store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
+attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
Index: test/CodeGen/AMDGPU/llvm.sin.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.sin.ll
+++ test/CodeGen/AMDGPU/llvm.sin.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; FUNC-LABEL: sin_f32
 ; EG: MULADD_IEEE *
@@ -10,58 +7,91 @@
 ; EG: ADD *
 ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG-NOT: SIN
+
 ; SI: v_mul_f32
 ; SI: v_fract_f32
 ; SI: v_sin_f32
 ; SI-NOT: v_sin_f32
-
 define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
    %sin = call float @llvm.sin.f32(float %x)
    store float %sin, float addrspace(1)* %out
    ret void
 }
 
-; FUNC-LABEL: {{^}}sin_3x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ef47644
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}safe_sin_3x_f32:
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+  %y = fmul float 3.0, %x
+  %sin = call float @llvm.sin.f32(float %y)
+  store float %sin, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_sin_3x_f32:
+; SI-NOT: v_add_f32
+; SI: 0x3ef47644
+; SI: v_mul_f32
 ; SI: v_fract_f32
 ; SI: v_sin_f32
 ; SI-NOT: v_sin_f32
-define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}sin_2x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}safe_sin_2x_f32:
+; SI: v_add_f32
+; SI: v_mul_f32
 ; SI: v_fract_f32
 ; SI: v_sin_f32
 ; SI-NOT: v_sin_f32
-define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}test_2sin_f32:
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}unsafe_sin_2x_f32:
+; SI-NOT: v_add_f32
+; SI: 0x3ea2f983
+; SI: v_mul_f32
 ; SI: v_fract_f32
 ; SI: v_sin_f32
 ; SI-NOT: v_sin_f32
-define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+define void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 {
+  %y = fmul float 2.0, %x
+  %sin = call float @llvm.sin.f32(float %y)
+  store float %sin, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_safe_2sin_f32:
+; SI: v_add_f32
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_safe_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+   %y = fmul float 2.0, %x
+   %sin = call float @llvm.sin.f32(float %y)
+   store float %sin, float addrspace(1)* %out
+   ret void
+}
+
+; FUNC-LABEL: {{^}}test_unsafe_2sin_f32:
+; SI: 0x3ea2f983
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_unsafe_2sin_f32(float addrspace(1)* %out, float %x) #2 {
    %y = fmul float 2.0, %x
    %sin = call float @llvm.sin.f32(float %y)
    store float %sin, float addrspace(1)* %out
@@ -74,17 +104,21 @@
 ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG-NOT: SIN
+
 ; SI: v_sin_f32
 ; SI: v_sin_f32
 ; SI: v_sin_f32
 ; SI: v_sin_f32
 ; SI-NOT: v_sin_f32
-
 define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
    store <4 x float> %sin, <4 x float> addrspace(1)* %out
    ret void
 }
 
-declare float @llvm.sin.f32(float) readnone
-declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
+declare float @llvm.sin.f32(float) #0
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "unsafe-fp-math"="false" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" }