Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -194,12 +194,6 @@ // Subtarget Features (options and debugging) //===------------------------------------------------------------===// -def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", - "FP16Denormals", - "true", - "Enable half precision denormal handling" ->; - // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. @@ -209,13 +203,30 @@ "Enable single precision denormal handling" >; -def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", - "FP64Denormals", +// Denormal handling for fp64 and fp16 is controlled by the same +// config register when fp16 supported. +// TODO: Do we need a separate f16 setting when not legal? +def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals", + "FP64FP16Denormals", "true", - "Enable double precision denormal handling", + "Enable double and half precision denormal handling", [FeatureFP64] >; +def : SubtargetFeature<"fp64-denormals", + "FP64FP16Denormals", + "true", + "Enable double and half precision denormal handling", + [FeatureFP64] +>; + +def : SubtargetFeature<"fp16-denormals", + "FP64FP16Denormals", + "true", + "Enable half precision denormal handling" +>; + + def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", "FPExceptions", "true", Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -81,9 +81,8 @@ bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP16Denormals; bool FP32Denormals; - bool FP64Denormals; + bool FP64FP16Denormals; bool FPExceptions; bool FlatForGlobal; bool UnalignedScratchAccess; @@ -280,7 +279,7 @@ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; bool hasFP16Denormals() const { - return FP16Denormals; + return FP64FP16Denormals; } bool hasFP32Denormals() const { @@ -288,7 +287,7 @@ } bool hasFP64Denormals() const { - return FP64Denormals; + return FP64FP16Denormals; } bool hasFPExceptions() const { Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -41,9 +41,10 @@ // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,"; + FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); @@ -52,9 +53,8 @@ // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP16Denormals = false; + FP64FP16Denormals = false; FP32Denormals = false; - FP64Denormals = false; } // Set defaults if needed. @@ -78,9 +78,8 @@ FastFMAF32(false), HalfRate64Ops(false), - FP16Denormals(false), FP32Denormals(false), - FP64Denormals(false), + FP64FP16Denormals(false), FPExceptions(false), FlatForGlobal(false), UnalignedScratchAccess(false), Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -54,6 +54,52 @@ ret void } +; GCN-LABEL: {{^}}test_f16_f64_denormals: +; GCN: FloatMode: 192 +; GCN: IeeeMode: 1 +define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_f16_f64_denormals: +; GCN: FloatMode: 0 +; GCN: IeeeMode: 1 +define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_f32_f16_f64_denormals: +; GCN: FloatMode: 240 +; GCN: IeeeMode: 1 +define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 { + store half 0.0, half addrspace(1)* %out0 + store float 0.0, float addrspace(1)* %out1 + store double 0.0, double addrspace(1)* %out2 + ret void +} + +; GCN-LABEL: {{^}}test_no_f64_denormals_legacy: +; GCN: FloatMode: 0 +; GCN: IeeeMode: 1 +define void @test_no_f64_denormals_legacy(half addrspace(1)* %out0, double addrspace(1)* %out1) #9 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_f16_denormals_legacy: +; GCN: FloatMode: 0 +; GCN: IeeeMode: 1 +define void @test_no_f16_denormals_legacy(half addrspace(1)* %out0, double addrspace(1)* %out1) #10 { + store half 0.0, half addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + ; GCN-LABEL: {{^}}kill_gs_const: ; GCN: IeeeMode: 0 define amdgpu_gs void @kill_gs_const() { @@ -87,4 +133,9 @@ attributes #2 = { nounwind "target-features"="+fp64-denormals" } attributes #3 = { nounwind "target-features"="+fp32-denormals" } attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } -attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" } +attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" } +attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } +attributes #9 = { nounwind "target-features"="-fp64-denormals" } +attributes #10 = { nounwind "target-features"="-fp16-denormals" } Index: test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -67,10 +67,10 @@ ret void } -; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}} ; GCN: buffer_store_short [[REG]] -define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { +define void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) store half %canonicalized, half addrspace(1)* %out ret void @@ -85,10 +85,10 @@ ret void } -; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16: -; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}} ; GCN: buffer_store_short [[REG]] -define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { +define void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) store half %canonicalized, half addrspace(1)* %out ret void @@ -168,5 +168,5 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" } -attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" } Index: test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.ll +++ test/CodeGen/AMDGPU/fcanonicalize.ll @@ -347,5 +347,5 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } -attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -1,5 +1,7 @@ ; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s + ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't ; make add an instruction if the fadd has more than one use. @@ -115,7 +117,8 @@ ; VI: v_cndmask_b32_e32 ; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| ; VI: v_mul_f16_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} -; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +; VI-DENORM: v_fma_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half @@ -136,7 +139,10 @@ ; GCN-LABEL: {{^}}multiple_use_fadd_fmac_f16: ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}} -; GCN-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0 + +; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0 +; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}} + ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm @@ -153,7 +159,10 @@ ; GCN-LABEL: {{^}}multiple_use_fadd_fmad_f16: ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}| -; GCN-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} + +; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} +; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} + ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm @@ -170,8 +179,12 @@ } ; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad_f16: -; GCN: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} -; GCN: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} +; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} +; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} + +; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}} +; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} + define void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half Index: test/CodeGen/AMDGPU/fmuladd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.f16.ll +++ test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s - -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s + +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fmuladd.f16(half, half, half) #1 Index: test/CodeGen/AMDGPU/hsa-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -62,7 +62,7 @@ attributes #0 = { nounwind "target-cpu"="kaveri" } attributes #1 = { nounwind "target-cpu"="fiji" } -attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" } -attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" } -attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" } -attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" } +attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-fp16-denormals" } +attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" } +attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } Index: test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -march=amdgcn -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac.ll +++ test/CodeGen/AMDGPU/v_mac.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s ; GCN-LABEL: {{^}}mac_vvv: ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} @@ -250,8 +251,8 @@ ; FIXME: How is this not folded? ; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00 -; VI: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] -; VI: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 +; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] +; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() Index: test/CodeGen/AMDGPU/v_mac_f16.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac_f16.ll +++ test/CodeGen/AMDGPU/v_mac_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mac_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] @@ -604,5 +604,5 @@ ret void } -attributes #0 = {"unsafe-fp-math"="false"} -attributes #1 = {"unsafe-fp-math"="true"} +attributes #0 = { nounwind "unsafe-fp-math"="false" } +attributes #1 = { nounwind "unsafe-fp-math"="true" } Index: test/CodeGen/AMDGPU/v_madak_f16.ll =================================================================== --- test/CodeGen/AMDGPU/v_madak_f16.ll +++ test/CodeGen/AMDGPU/v_madak_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}madak_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]