Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -1,36 +1,62 @@ -; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; FUNC-LABEL: {{^}}test_kernel: +; GCN-LABEL: {{^}}test_default_si: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 0 +define void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} -; DEFAULT: FloatMode: 192 -; DEFAULT: IeeeMode: 0 +; GCN-LABEL: {{^}}test_default_vi: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 0 +define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} -; FP64-DENORMAL: FloatMode: 192 -; FP64-DENORMAL: IeeeMode: 0 +; GCN-LABEL: {{^}}test_f64_denormals: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 0 +define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} -; FP32-DENORMAL: FloatMode: 48 -; FP32-DENORMAL: IeeeMode: 0 +; GCN-LABEL: {{^}}test_f32_denormals: +; GCNL: FloatMode: 48 +; GCN: IeeeMode: 0 +define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} -; BOTH-DENORMAL: FloatMode: 240 -; BOTH-DENORMAL: IeeeMode: 0 +; GCN-LABEL: {{^}}test_f32_f64_denormals: +; GCN: FloatMode: 240 +; GCN: IeeeMode: 0 +define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} -; NO-DENORMAL: FloatMode: 0 -; NO-DENORMAL: IeeeMode: 0 -define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { +; GCN-LABEL: {{^}}test_no_denormals +; GCN: FloatMode: 0 +; GCN: IeeeMode: 0 +define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 ret void } + +attributes #0 = { nounwind "target-cpu"="tahiti" } +attributes #1 = { nounwind "target-cpu"="fiji" } +attributes #2 = { nounwind "target-features"="+fp64-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals" } +attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } Index: test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.ll +++ test/CodeGen/AMDGPU/fcanonicalize.ll @@ -1,5 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=NODENORM %s -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=DENORM %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare float @llvm.canonicalize.f32(float) #0 declare double @llvm.canonicalize.f64(double) #0 @@ -68,21 +67,37 @@ ret void } -; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f32: -; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} -; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}} +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[REG]] +define void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 { + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}} ; GCN: buffer_store_dword [[REG]] -define void @test_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 { +define void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 { %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) store float %canonicalized, float addrspace(1)* %out ret void } -; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f32: -; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} -; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}} +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[REG]] +define void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 { + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}} ; GCN: buffer_store_dword [[REG]] -define void @test_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 { +define void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 { %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) store float %canonicalized, float addrspace(1)* %out ret void @@ -220,27 +235,41 @@ ret void } -; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f64: -; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} -; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64: +; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 { + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} -; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 { +define void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 { %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) store double %canonicalized, double addrspace(1)* %out ret void } -; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f64: -; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} -; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64: +; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 { + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} -; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 { +define void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 { %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) store double %canonicalized, double addrspace(1)* %out ret void @@ -318,3 +347,5 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } Index: test/CodeGen/AMDGPU/ffloor.f64.ll =================================================================== --- test/CodeGen/AMDGPU/ffloor.f64.ll +++ test/CodeGen/AMDGPU/ffloor.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s @@ -67,15 +67,16 @@ ret void } -; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64: -; FIXME-CI: v_floor_f64_e32 -; FIXME-CI: v_floor_f64_e32 -; FIXME-CI: v_floor_f64_e32 -; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { -; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out -; ret void -; } +; FUNC-LABEL: {{^}}ffloor_v3f64: +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI-NOT: v_floor_f64_e32 +define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { + %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone + store <3 x double> %y, <3 x double> addrspace(1)* %out + ret void +} ; FUNC-LABEL: {{^}}ffloor_v4f64: ; CI: v_floor_f64_e32 Index: test/CodeGen/AMDGPU/hsa-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,10 +1,68 @@ -; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=COMMON %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; COMMON-LABEL: {{^}}test_kernel: -; COMMON-DENORMAL: compute_pgm_rsrc1_float_mode = compute_pgm_rsrc1_float_mode = 192 -; COMMON-DENORMAL: compute_pgm_rsrc1_dx10_clamp = 1 -define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { +; GCN-LABEL: {{^}}test_default_ci: +; GCN: compute_pgm_rsrc1_float_mode = 192 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { store float 0.0, float addrspace(1)* %out0 store double 0.0, double addrspace(1)* %out1 ret void } + +; GCN-LABEL: {{^}}test_default_vi: +; GCN: compute_pgm_rsrc1_float_mode = 192 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f64_denormals: +; GCN: compute_pgm_rsrc1_float_mode = 192 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f32_denormals: +; GCN: compute_pgm_rsrc1_float_mode = 48 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f32_f64_denormals: +; GCN: compute_pgm_rsrc1_float_mode = 240 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_denormals: +; GCN: compute_pgm_rsrc1_float_mode = 0 +; GCN: compute_pgm_rsrc1_dx10_clamp = 1 +; GCN: compute_pgm_rsrc1_ieee_mode = 0 +define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +attributes #0 = { nounwind "target-cpu"="kaveri" } +attributes #1 = { nounwind "target-cpu"="fiji" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" } +attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -1,9 +1,4 @@ -; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.amdgcn.rcp.f32(float) #0 declare double @llvm.amdgcn.rcp.f64(double) #0 @@ -11,71 +6,123 @@ declare double @llvm.sqrt.f64(double) #0 declare float @llvm.sqrt.f32(float) #0 +; FUNC-LABEL: {{^}}rcp_undef_f32: +; SI-NOT: v_rcp_f32 +define void @rcp_undef_f32(float addrspace(1)* %out) #1 { + %rcp = call float @llvm.amdgcn.rcp.f32(float undef) + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} -; FUNC-LABEL: {{^}}rcp_f32: -; SI: v_rcp_f32_e32 -define void @rcp_f32(float addrspace(1)* %out, float %src) #1 { - %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0 +; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32: +; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 { + %rcp = fdiv float 1.0, %src store float %rcp, float addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: {{^}}rcp_pat_f32: +; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32: +; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 { + %rcp = fdiv float 1.0, %src + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} -; SI-SAFE: v_rcp_f32_e32 -; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32 -define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { +; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: +; SI: v_div_scale_f32 +define void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 { %rcp = fdiv float 1.0, %src store float %rcp, float addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: {{^}}rsq_rcp_pat_f32: -; SI-UNSAFE: v_rsq_f32_e32 -; SI-SAFE: v_sqrt_f32_e32 -; SI-SAFE: v_rcp_f32_e32 -define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { - %sqrt = call float @llvm.sqrt.f32(float %src) #0 - %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0 +; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: +; SI: v_sqrt_f32_e32 +; SI: v_rcp_f32_e32 +define void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { + %sqrt = call float @llvm.sqrt.f32(float %src) + %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: +; SI: v_rsq_f32_e32 +define void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 { + %sqrt = call float @llvm.sqrt.f32(float %src) + %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) store float %rcp, float addrspace(1)* %out, align 4 ret void } ; FUNC-LABEL: {{^}}rcp_f64: -; SI: v_rcp_f64_e32 +; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dwordx2 [[RESULT]] define void @rcp_f64(double addrspace(1)* %out, double %src) #1 { - %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0 + %rcp = call double @llvm.amdgcn.rcp.f64(double %src) + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}unsafe_rcp_f64: +; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 { + %rcp = call double @llvm.amdgcn.rcp.f64(double %src) store double %rcp, double addrspace(1)* %out, align 8 ret void } ; FUNC-LABEL: {{^}}rcp_pat_f64: -; SI: v_rcp_f64_e32 +; SI: v_div_scale_f64 define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { %rcp = fdiv double 1.0, %src store double %rcp, double addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}rsq_rcp_pat_f64: -; SI-UNSAFE: v_rsq_f64_e32 -; SI-SAFE-NOT: v_rsq_f64_e32 -; SI-SAFE: v_sqrt_f64 -; SI-SAFE: v_rcp_f64 -define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { - %sqrt = call double @llvm.sqrt.f64(double %src) #0 - %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0 +; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64: +; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { + %rcp = fdiv double 1.0, %src store double %rcp, double addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}rcp_undef_f32: -; SI-NOT: v_rcp_f32 -define void @rcp_undef_f32(float addrspace(1)* %out) #1 { - %rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0 - store float %rcp, float addrspace(1)* %out, align 4 +; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64: +; SI-NOT: v_rsq_f64_e32 +; SI: v_sqrt_f64 +; SI: v_rcp_f64 +define void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { + %sqrt = call double @llvm.sqrt.f64(double %src) + %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: +; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} +; SI-NOT: [[RESULT]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { + %sqrt = call double @llvm.sqrt.f64(double %src) + %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) + store double %rcp, double addrspace(1)* %out, align 8 ret void } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } +attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" } +attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" } +attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" } +attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" } Index: test/CodeGen/AMDGPU/llvm.sin.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sin.ll +++ test/CodeGen/AMDGPU/llvm.sin.ll @@ -1,8 +1,5 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: sin_f32 ; EG: MULADD_IEEE * @@ -10,58 +7,91 @@ ; EG: ADD * ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG-NOT: SIN + ; SI: v_mul_f32 ; SI: v_fract_f32 ; SI: v_sin_f32 ; SI-NOT: v_sin_f32 - define void @sin_f32(float addrspace(1)* %out, float %x) #1 { %sin = call float @llvm.sin.f32(float %x) store float %sin, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}sin_3x_f32: -; SI-UNSAFE-NOT: v_add_f32 -; SI-UNSAFE: 0x3ef47644 -; SI-UNSAFE: v_mul_f32 -; SI-SAFE: v_mul_f32 -; SI-SAFE: v_mul_f32 +; FUNC-LABEL: {{^}}safe_sin_3x_f32: +; SI: v_mul_f32 +; SI: v_mul_f32 +; SI: v_fract_f32 +; SI: v_sin_f32 +; SI-NOT: v_sin_f32 +define void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 3.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}unsafe_sin_3x_f32: +; SI-NOT: v_add_f32 +; SI: 0x3ef47644 +; SI: v_mul_f32 ; SI: v_fract_f32 ; SI: v_sin_f32 ; SI-NOT: v_sin_f32 -define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 { +define void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 { %y = fmul float 3.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}sin_2x_f32: -; SI-UNSAFE-NOT: v_add_f32 -; SI-UNSAFE: 0x3ea2f983 -; SI-UNSAFE: v_mul_f32 -; SI-SAFE: v_add_f32 -; SI-SAFE: v_mul_f32 +; FUNC-LABEL: {{^}}safe_sin_2x_f32: +; SI: v_add_f32 +; SI: v_mul_f32 ; SI: v_fract_f32 ; SI: v_sin_f32 ; SI-NOT: v_sin_f32 -define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 { +define void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 { %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}test_2sin_f32: -; SI-UNSAFE: 0x3ea2f983 -; SI-UNSAFE: v_mul_f32 -; SI-SAFE: v_add_f32 -; SI-SAFE: v_mul_f32 +; FUNC-LABEL: {{^}}unsafe_sin_2x_f32: +; SI-NOT: v_add_f32 +; SI: 0x3ea2f983 +; SI: v_mul_f32 ; SI: v_fract_f32 ; SI: v_sin_f32 ; SI-NOT: v_sin_f32 -define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 { +define void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 { + %y = fmul float 2.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_safe_2sin_f32: +; SI: v_add_f32 +; SI: v_mul_f32 +; SI: v_fract_f32 +; SI: v_sin_f32 +; SI-NOT: v_sin_f32 +define void @test_safe_2sin_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 2.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_unsafe_2sin_f32: +; SI: 0x3ea2f983 +; SI: v_mul_f32 +; SI: v_fract_f32 +; SI: v_sin_f32 +; SI-NOT: v_sin_f32 +define void @test_unsafe_2sin_f32(float addrspace(1)* %out, float %x) #2 { %y = fmul float 2.0, %x %sin = call float @llvm.sin.f32(float %y) store float %sin, float addrspace(1)* %out @@ -74,17 +104,21 @@ ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG-NOT: SIN + ; SI: v_sin_f32 ; SI: v_sin_f32 ; SI: v_sin_f32 ; SI: v_sin_f32 ; SI-NOT: v_sin_f32 - define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 { %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx) store <4 x float> %sin, <4 x float> addrspace(1)* %out ret void } -declare float @llvm.sin.f32(float) readnone -declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone +declare float @llvm.sin.f32(float) #0 +declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "unsafe-fp-math"="false" } +attributes #2 = { nounwind "unsafe-fp-math"="true" }