Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -151,6 +151,11 @@ Changes to the AMDGPU Target ----------------------------- +* The backend default subnormal handling mode has been switched to on + for all targets for all compute function types. Frontends wishing to + retain the old behavior should explicitly request f32 subnormal + flushing. + Changes to the AVR Target ----------------------------- Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -92,7 +92,7 @@ // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - FullFS += "+fp64-fp16-denormals,"; + FullFS += "+fp64-fp16-denormals,+fp32-denormals,"; } else { FullFS += "-fp32-denormals,"; } Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -702,8 +702,8 @@ SIModeRegisterDefaults Mode; Mode.DX10Clamp = true; Mode.IEEE = IsCompute; - Mode.FP32InputDenormals = false; // FIXME: Should be on by default. - Mode.FP32OutputDenormals = false; // FIXME: Should be on by default. + Mode.FP32InputDenormals = true; + Mode.FP32OutputDenormals = true; Mode.FP64FP16InputDenormals = true; Mode.FP64FP16OutputDenormals = true; return Mode; Index: llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll +++ llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll @@ -3,8 +3,8 @@ ; GCN-LABEL: {{^}}kernel_ieee_mode_default: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define amdgpu_kernel void @kernel_ieee_mode_default() #0 { @@ -18,8 +18,8 @@ ; GCN-LABEL: {{^}}kernel_ieee_mode_on: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define amdgpu_kernel void @kernel_ieee_mode_on() #1 { @@ -48,8 +48,8 @@ ; GCN-LABEL: {{^}}func_ieee_mode_default: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define void @func_ieee_mode_default() #0 { @@ -63,8 +63,8 @@ ; GCN-LABEL: {{^}}func_ieee_mode_on: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define void @func_ieee_mode_on() #1 { @@ -93,8 +93,8 @@ ; GCN-LABEL: {{^}}cs_ieee_mode_default: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define amdgpu_cs void @cs_ieee_mode_default() #0 { @@ -108,8 +108,8 @@ ; GCN-LABEL: {{^}}cs_ieee_mode_on: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define amdgpu_cs void @cs_ieee_mode_on() #1 { @@ -150,11 +150,12 @@ ret void } +; FIXME: Should have denormals off by default. ; GCN-LABEL: {{^}}ps_ieee_mode_on: ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] -; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] -; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]] +; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]] ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] ; GCN-NOT: v_mul_f32 define amdgpu_ps void @ps_ieee_mode_on() #1 { Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -214,7 +214,7 @@ } attributes #0 = { nounwind optnone noinline } -attributes #1 = { nounwind } +attributes #1 = { nounwind "target-features"="-fp32-denormals" } attributes #2 = { nounwind "target-features"="+fp32-denormals" } ; CHECK: !0 = !{float 2.500000e+00} Index: llvm/test/CodeGen/AMDGPU/clamp-modifier.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -389,7 +389,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="-fp32-denormals" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "target-features"="+fp32-denormals" } attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" } Index: llvm/test/CodeGen/AMDGPU/clamp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/clamp.ll +++ llvm/test/CodeGen/AMDGPU/clamp.ll @@ -767,8 +767,8 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1 -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="-fp32-denormals" } attributes #1 = { nounwind readnone } -attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" } -attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" } -attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" } +attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" } +attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" } +attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" } Index: llvm/test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/default-fp-mode.ll +++ llvm/test/CodeGen/AMDGPU/default-fp-mode.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_si: -; GCN: FloatMode: 192 +; GCN: FloatMode: 240 ; GCN: IeeeMode: 1 define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { store float 0.0, float addrspace(1)* %out0 @@ -10,7 +10,7 @@ } ; GCN-LABEL: {{^}}test_default_vi: -; GCN: FloatMode: 192 +; GCN: FloatMode: 240 ; GCN: IeeeMode: 1 define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { store float 0.0, float addrspace(1)* %out0 @@ -19,7 +19,7 @@ } ; GCN-LABEL: {{^}}test_f64_denormals: -; GCN: FloatMode: 192 +; GCN: FloatMode: 240 ; GCN: IeeeMode: 1 define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 { store float 0.0, float addrspace(1)* %out0 @@ -55,7 +55,7 @@ } ; GCN-LABEL: {{^}}test_f16_f64_denormals: -; GCN: FloatMode: 192 +; GCN: FloatMode: 240 ; GCN: IeeeMode: 1 define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 { store half 0.0, half addrspace(1)* %out0 @@ -64,7 +64,7 @@ } ; GCN-LABEL: {{^}}test_no_f16_f64_denormals: -; GCN: FloatMode: 0 +; GCN: FloatMode: 48 ; GCN: IeeeMode: 1 define amdgpu_kernel void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 { store half 0.0, half addrspace(1)* %out0 @@ -82,7 +82,9 @@ ret void } +; FIXME: Denormals should be off by default ; GCN-LABEL: {{^}}kill_gs_const: +; GCN: FloatMode: 240 ; GCN: IeeeMode: 0 define amdgpu_gs void @kill_gs_const() { main_body: @@ -94,6 +96,7 @@ } ; GCN-LABEL: {{^}}kill_vcc_implicit_def: +; GCN: FloatMode: 240 ; GCN: IeeeMode: 0 define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* inreg, [17 x <16 x i8>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [34 x <8 x i32>] addrspace(4)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) { entry: Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -739,6 +739,6 @@ } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" } -attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" } +attributes #1 = { nounwind "target-features"="-fp32-denormals" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" } Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -625,7 +625,7 @@ } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } +attributes #1 = { nounwind "target-features"="-fp32-denormals" } attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" } Index: llvm/test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdiv.f16.ll +++ llvm/test/CodeGen/AMDGPU/fdiv.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s +; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s Index: llvm/test/CodeGen/AMDGPU/fdot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdot2.ll +++ llvm/test/CodeGen/AMDGPU/fdot2.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 -; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE -; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT -; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906 +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE +; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT +; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906 ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z) Index: llvm/test/CodeGen/AMDGPU/fma-combine.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be Index: llvm/test/CodeGen/AMDGPU/fmaxnum.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmaxnum.ll +++ llvm/test/CodeGen/AMDGPU/fmaxnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on: ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} Index: llvm/test/CodeGen/AMDGPU/fminnum.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fminnum.ll +++ llvm/test/CodeGen/AMDGPU/fminnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on: ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} Index: llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -1,8 +1,8 @@ -; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s +; XUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't ; make add an instruction if the fadd has more than one use. Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=-fp32-denormals,+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=-fp32-denormals,+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s ; -------------------------------------------------------------------------------- ; fadd tests Index: llvm/test/CodeGen/AMDGPU/frem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/frem.ll +++ llvm/test/CodeGen/AMDGPU/frem.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} Index: llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: -; GCN: float_mode = 192 +; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 define amdgpu_kernel void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { @@ -11,7 +11,7 @@ } ; GCN-LABEL: {{^}}test_default_vi: -; GCN: float_mode = 192 +; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { @@ -61,7 +61,7 @@ } ; GCN-LABEL: {{^}}test_no_dx10_clamp_vi: -; GCN: float_mode = 192 +; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 0 ; GCN: enable_ieee_mode = 1 define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #6 { @@ -71,7 +71,7 @@ } ; GCN-LABEL: {{^}}test_no_ieee_mode_vi: -; GCN: float_mode = 192 +; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 0 define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 { @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi: -; GCN: float_mode = 192 +; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 0 ; GCN: enable_ieee_mode = 0 define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 { Index: llvm/test/CodeGen/AMDGPU/known-never-snan.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/known-never-snan.ll +++ llvm/test/CodeGen/AMDGPU/known-never-snan.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Mostly overlaps with fmed3.ll to stress specific cases of ; isKnownNeverSNaN. Index: llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s -; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s -; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s declare half @llvm.maxnum.f16(half %a, half %b) declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) Index: llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s -; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s declare half @llvm.minnum.f16(half %a, half %b) declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) Index: llvm/test/CodeGen/AMDGPU/mad-combine.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -1,8 +1,8 @@ ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s ; Make sure we don't form mad with denormals ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s Index: llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll +++ llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -143,5 +143,5 @@ declare float @llvm.fmuladd.f32(float, float, float) #1 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="-fp32-denormals" } attributes #1 = { nounwind readnone speculatable } Index: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -310,5 +310,5 @@ declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="-fp32-denormals" } attributes #1 = { nounwind readnone speculatable } Index: llvm/test/CodeGen/AMDGPU/madak.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/madak.ll +++ llvm/test/CodeGen/AMDGPU/madak.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/madmk.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/madmk.ll +++ llvm/test/CodeGen/AMDGPU/madmk.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: None of these trigger madmk emission anymore. It is still ; possible, but requires the correct registers to be used which is Index: llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s ; Make sure that AMDGPUCodeGenPrepare introduces mul24 intrinsics ; after SLSR, as the intrinsics would interfere. It's unclear if these Index: llvm/test/CodeGen/AMDGPU/omod.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/omod.ll +++ llvm/test/CodeGen/AMDGPU/omod.ll @@ -275,7 +275,7 @@ declare half @llvm.maxnum.f16(half, half) #1 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 -attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #0 = { nounwind "target-features"="-fp32-denormals" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "target-features"="+fp32-denormals" "no-signed-zeros-fp-math"="true" } attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" "no-signed-zeros-fp-math"="true" } Index: llvm/test/CodeGen/AMDGPU/operand-folding.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/operand-folding.ll +++ llvm/test/CodeGen/AMDGPU/operand-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s Index: llvm/test/CodeGen/AMDGPU/rcp-pattern.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/rcp-pattern.ll +++ llvm/test/CodeGen/AMDGPU/rcp-pattern.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s Index: llvm/test/CodeGen/AMDGPU/rcp_iflag.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/rcp_iflag.ll +++ llvm/test/CodeGen/AMDGPU/rcp_iflag.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}rcp_uint: ; GCN: v_rcp_iflag_f32_e32 Index: llvm/test/CodeGen/AMDGPU/sdivrem64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sdivrem64.ll +++ llvm/test/CodeGen/AMDGPU/sdivrem64.ll @@ -1,5 +1,5 @@ -;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}s_test_sdiv: Index: llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -35,7 +35,7 @@ ; VI-NEXT: .amdhsa_reserve_vcc 0 ; VI-NEXT: .amdhsa_float_round_mode_32 0 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 -; VI-NEXT: .amdhsa_float_denorm_mode_32 0 +; VI-NEXT: .amdhsa_float_denorm_mode_32 3 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; VI-NEXT: .amdhsa_dx10_clamp 1 ; VI-NEXT: .amdhsa_ieee_mode 1 @@ -79,7 +79,7 @@ ; GFX9-NEXT: .amdhsa_reserve_vcc 0 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 -; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 +; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; GFX9-NEXT: .amdhsa_dx10_clamp 1 ; GFX9-NEXT: .amdhsa_ieee_mode 1 @@ -130,7 +130,7 @@ ; VI-NEXT: .amdhsa_reserve_vcc 0 ; VI-NEXT: .amdhsa_float_round_mode_32 0 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 -; VI-NEXT: .amdhsa_float_denorm_mode_32 0 +; VI-NEXT: .amdhsa_float_denorm_mode_32 3 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; VI-NEXT: .amdhsa_dx10_clamp 1 ; VI-NEXT: .amdhsa_ieee_mode 1 @@ -174,7 +174,7 @@ ; GFX9-NEXT: .amdhsa_reserve_vcc 0 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 -; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 +; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; GFX9-NEXT: .amdhsa_dx10_clamp 1 ; GFX9-NEXT: .amdhsa_ieee_mode 1 @@ -225,7 +225,7 @@ ; VI-NEXT: .amdhsa_reserve_vcc 0 ; VI-NEXT: .amdhsa_float_round_mode_32 0 ; VI-NEXT: .amdhsa_float_round_mode_16_64 0 -; VI-NEXT: .amdhsa_float_denorm_mode_32 0 +; VI-NEXT: .amdhsa_float_denorm_mode_32 3 ; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; VI-NEXT: .amdhsa_dx10_clamp 1 ; VI-NEXT: .amdhsa_ieee_mode 1 @@ -269,7 +269,7 @@ ; GFX9-NEXT: .amdhsa_reserve_vcc 0 ; GFX9-NEXT: .amdhsa_float_round_mode_32 0 ; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 -; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 +; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 ; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 ; GFX9-NEXT: .amdhsa_dx10_clamp 1 ; GFX9-NEXT: .amdhsa_ieee_mode 1 Index: llvm/test/CodeGen/AMDGPU/udivrem64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/udivrem64.ll +++ llvm/test/CodeGen/AMDGPU/udivrem64.ll @@ -1,6 +1,6 @@ -;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_udiv: Index: llvm/test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_mac.ll +++ llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s ; GCN-LABEL: {{^}}mac_vvv: ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} Index: llvm/test/CodeGen/AMDGPU/v_mac_f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_mac_f16.ll +++ llvm/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mac_f16: ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]] Index: llvm/test/CodeGen/AMDGPU/v_madak_f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_madak_f16.ll +++ llvm/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI define amdgpu_kernel void @madak_f16( ; SI-LABEL: madak_f16: Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -28,8 +28,8 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 @@ -88,8 +88,8 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 @@ -117,8 +117,8 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 @@ -131,8 +131,8 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: false -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_dx10_clamp_off() #1 { @@ -143,8 +143,8 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_ieee_off() #2 { @@ -155,8 +155,8 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: false -; CHECK-NEXT: fp32-input-denormals: false -; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp32-input-denormals: true +; CHECK-NEXT: fp32-output-denormals: true ; CHECK-NEXT: fp64-fp16-input-denormals: true ; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_ieee_off_dx10_clamp_off() #3 {