Index: test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- test/CodeGen/AMDGPU/commute-shifts.ll +++ test/CodeGen/AMDGPU/commute-shifts.ll @@ -25,5 +25,5 @@ declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "enable-no-nans-fp-math"="true" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/debugger-insert-nops.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ test/CodeGen/AMDGPU/debugger-insert-nops.ll @@ -34,7 +34,7 @@ ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} Index: test/CodeGen/AMDGPU/debugger-reserve-regs.ll =================================================================== --- test/CodeGen/AMDGPU/debugger-reserve-regs.ll +++ test/CodeGen/AMDGPU/debugger-reserve-regs.ll @@ -25,7 +25,7 @@ ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -67,4 +67,4 @@ attributes #0 = { nounwind readnone } attributes #1 = { convergent nounwind } -attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } Index: test/CodeGen/AMDGPU/ds_read2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2.ll +++ test/CodeGen/AMDGPU/ds_read2.ll @@ -508,6 +508,6 @@ ; Function Attrs: convergent nounwind declare void @llvm.amdgcn.s.barrier() #2 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { convergent nounwind } Index: test/CodeGen/AMDGPU/ds_read2_superreg.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -204,6 +204,6 @@ ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { convergent nounwind } Index: test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2st64.ll +++ test/CodeGen/AMDGPU/ds_read2st64.ll @@ -258,5 +258,5 @@ ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2.ll +++ test/CodeGen/AMDGPU/ds_write2.ll @@ -431,6 +431,6 @@ ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { convergent nounwind } Index: test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2st64.ll +++ test/CodeGen/AMDGPU/ds_write2st64.ll @@ -103,6 +103,6 @@ ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { convergent nounwind } Index: test/CodeGen/AMDGPU/fmed3.ll =================================================================== --- test/CodeGen/AMDGPU/fmed3.ll +++ test/CodeGen/AMDGPU/fmed3.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.minnum.f32(float, float) #0 declare float @llvm.maxnum.f32(float, float) #0 declare double @llvm.minnum.f64(double, double) #0 @@ -13,7 +13,7 @@ ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -31,7 +31,7 @@ ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -49,7 +49,7 @@ ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -65,7 +65,7 @@ ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -82,7 +82,7 @@ ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -99,7 +99,7 @@ ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0 ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0 define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid %a = load double, double addrspace(1)* %gep0 @@ -114,7 +114,7 @@ ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32: ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 @@ -132,7 +132,7 @@ ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 Index: test/CodeGen/AMDGPU/fmul.ll =================================================================== --- test/CodeGen/AMDGPU/fmul.ll +++ test/CodeGen/AMDGPU/fmul.ll @@ -1,12 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - ; FUNC-LABEL: {{^}}fmul_f32: -; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W +; GCN: v_mul_f32 -; SI: v_mul_f32 +; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) { entry: %0 = fmul float %a, %b @@ -19,11 +18,11 @@ declare void @llvm.AMDGPU.store.output(float, i32) ; FUNC-LABEL: {{^}}fmul_v2f32: +; GCN: v_mul_f32 +; GCN: v_mul_f32 + ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}} ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}} - -; SI: v_mul_f32 -; SI: v_mul_f32 define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { entry: %0 = fmul <2 x float> %a, %b @@ -32,15 +31,15 @@ } ; FUNC-LABEL: {{^}}fmul_v4f32: +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 + ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -; SI: v_mul_f32 -; SI: v_mul_f32 -; SI: v_mul_f32 -; SI: v_mul_f32 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in @@ -51,9 +50,9 @@ } ; FUNC-LABEL: {{^}}test_mul_2_k: -; SI: v_mul_f32 -; SI-NOT: v_mul_f32 -; SI: s_endpgm +; GCN: v_mul_f32 +; GCN-NOT: v_mul_f32 +; GCN: s_endpgm define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 { %y = fmul float %x, 2.0 %z = fmul float %y, 3.0 @@ -62,10 +61,10 @@ } ; FUNC-LABEL: {{^}}test_mul_2_k_inv: -; SI: v_mul_f32 -; SI-NOT: v_mul_f32 -; SI-NOT: v_mad_f32 -; SI: s_endpgm +; GCN: v_mul_f32 +; GCN-NOT: v_mul_f32 +; GCN-NOT: v_mad_f32 +; GCN: s_endpgm define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 { %y = fmul float %x, 3.0 %z = fmul float %y, 2.0 @@ -76,10 +75,10 @@ ; There should be three multiplies here; %a should be used twice (once ; negated), not duplicated into mul x, 5.0 and mul x, -5.0. ; FUNC-LABEL: {{^}}test_mul_twouse: -; SI: v_mul_f32 -; SI: v_mul_f32 -; SI: v_mul_f32 -; SI-NOT: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; GCN-NOT: v_mul_f32 define void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 { %a = fmul float %x, 5.0 %b = fsub float -0.0, %a @@ -89,4 +88,4 @@ ret void } -attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll +++ test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll @@ -1,11 +1,11 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s -; BOTH-LABEL: {{^}}main: -; BOTH: s_mov_b32 m0, s0 +; GCN-LABEL: {{^}}main: +; GCN: s_mov_b32 m0, s0 ; VI-NEXT: s_nop 0 -; BOTH-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) -; BOTH-NEXT: s_endpgm +; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) +; GCN-NEXT: s_endpgm define amdgpu_gs void @main(i32 inreg %a) #0 { main_body: @@ -16,5 +16,4 @@ ; Function Attrs: nounwind declare void @llvm.SI.sendmsg(i32, i32) #1 -attributes #0 = { "unsafe-fp-math"="true" } -attributes #1 = { nounwind } +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.dbg.value.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -14,7 +14,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} Index: test/CodeGen/AMDGPU/mubuf.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf.ll +++ test/CodeGen/AMDGPU/mubuf.ll @@ -174,8 +174,7 @@ ret void } -declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -attributes #1 = { "unsafe-fp-math"="true" } -attributes #3 = { nounwind readonly } +attributes #0 = { nounwind readonly } Index: test/CodeGen/AMDGPU/sgpr-copy.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-copy.ll +++ test/CodeGen/AMDGPU/sgpr-copy.ll @@ -394,10 +394,10 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 -attributes #0 = { "unsafe-fp-math"="true" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -attributes #2 = { readonly } -attributes #3 = { readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind readnone } !0 = !{!1, !1, i64 0, i32 1} !1 = !{!"const", null} Index: test/CodeGen/AMDGPU/si-spill-cf.ll =================================================================== --- test/CodeGen/AMDGPU/si-spill-cf.ll +++ test/CodeGen/AMDGPU/si-spill-cf.ll @@ -6,7 +6,7 @@ ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]] ; SI-NOT: v_readlane_b32 [[SAVED]] -define amdgpu_ps void @main() { +define amdgpu_ps void @main() #0 { main_body: %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16) %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32) @@ -80,7 +80,7 @@ LOOP: ; preds = %ENDIF2795, %main_body %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ] %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ] - %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %67 = icmp sgt i32 %tid, 4 br i1 %67, label %ENDLOOP, label %ENDIF @@ -490,25 +490,24 @@ br label %ENDIF2795 } -declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2 +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 ; Function Attrs: nounwind readnone -declare float @llvm.SI.load.const(<16 x i8>, i32) #2 +declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone -declare float @llvm.floor.f32(float) #2 +declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone -declare float @llvm.sqrt.f32(float) #2 +declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone -declare float @llvm.minnum.f32(float, float) #2 +declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone -declare float @llvm.maxnum.f32(float, float) #2 +declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { alwaysinline nounwind readnone } -attributes #1 = { "enable-no-nans-fp-math"="true" } -attributes #2 = { nounwind readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -229,5 +229,5 @@ ; ret void ; } -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind convergent } Index: test/CodeGen/AMDGPU/si-vector-hang.ll =================================================================== --- test/CodeGen/AMDGPU/si-vector-hang.ll +++ test/CodeGen/AMDGPU/si-vector-hang.ll @@ -90,7 +90,7 @@ ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} Index: test/CodeGen/AMDGPU/split-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/split-smrd.ll +++ test/CodeGen/AMDGPU/split-smrd.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s ; FIXME: Move this to sgpr-copy.ll when this is fixed on VI. ; Make sure that when we split an smrd instruction in order to move it to @@ -38,7 +38,7 @@ declare i32 @llvm.SI.packf16(float, float) #1 -attributes #0 = { "unsafe-fp-math"="true" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !0 = !{!1, !1, i64 0, i32 1} Index: test/CodeGen/AMDGPU/store.ll =================================================================== --- test/CodeGen/AMDGPU/store.ll +++ test/CodeGen/AMDGPU/store.ll @@ -377,4 +377,4 @@ ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/subreg-coalescer-crash.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-crash.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-crash.ll @@ -105,5 +105,5 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac.ll +++ test/CodeGen/AMDGPU/v_mac.ll @@ -7,7 +7,7 @@ ; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8 ; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]] ; GCN: buffer_store_dword [[C]] -define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) { +define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 { entry: %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 @@ -25,7 +25,7 @@ ; GCN-LABEL: {{^}}mad_inline_sgpr_inline: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5 -define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) { +define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) #0 { entry: %tmp0 = fmul float 0.5, %in %tmp1 = fadd float %tmp0, 0.5 @@ -36,7 +36,7 @@ ; GCN-LABEL: {{^}}mad_vvs: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} -define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) { +define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) #0 { entry: %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 @@ -51,7 +51,7 @@ ; GCN-LABEL: {{^}}mac_ssv: ; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) { +define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) #0 { entry: %c = load float, float addrspace(1)* %in @@ -64,7 +64,7 @@ ; GCN-LABEL: {{^}}mac_mad_same_add: ; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] ; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} -define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) { +define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) #0 { entry: %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 @@ -104,6 +104,46 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr + %neg_a = fsub float -0.0, %a + %tmp0 = fmul float %neg_a, %b + %tmp1 = fadd float %tmp0, %c + + store float %tmp1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_mad_sub0_src0: +; GCN-NOT: v_mac_f32 +; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 { +entry: + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 + + %a = load float, float addrspace(1)* %in + %b = load float, float addrspace(1)* %b_ptr + %c = load float, float addrspace(1)* %c_ptr + + %neg_a = fsub float 0.0, %a + %tmp0 = fmul float %neg_a, %b + %tmp1 = fadd float %tmp0, %c + + store float %tmp1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}safe_mad_sub0_src0: +; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0, +; GCN: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[SUB0]] +define void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 + + %a = load float, float addrspace(1)* %in + %b = load float, float addrspace(1)* %b_ptr + %c = load float, float addrspace(1)* %c_ptr + %neg_a = fsub float 0.0, %a %tmp0 = fmul float %neg_a, %b %tmp1 = fadd float %tmp0, %c @@ -124,6 +164,26 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr + %neg_b = fsub float -0.0, %b + %tmp0 = fmul float %a, %neg_b + %tmp1 = fadd float %tmp0, %c + + store float %tmp1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_mad_sub0_src1: +; GCN-NOT: v_mac_f32 +; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 { +entry: + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 + + %a = load float, float addrspace(1)* %in + %b = load float, float addrspace(1)* %b_ptr + %c = load float, float addrspace(1)* %c_ptr + %neg_b = fsub float 0.0, %b %tmp0 = fmul float %a, %neg_b %tmp1 = fadd float %tmp0, %c @@ -144,7 +204,7 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr - %neg_c = fsub float 0.0, %c + %neg_c = fsub float -0.0, %c %tmp0 = fmul float %a, %b %tmp1 = fadd float %tmp0, %neg_c @@ -152,4 +212,5 @@ ret void } -attributes #0 = { "true" "unsafe-fp-math"="true" } +attributes #0 = { nounwind "unsafe-fp-math"="false" } +attributes #1 = { nounwind "unsafe-fp-math"="true" } Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -26,7 +26,7 @@ ; GCN: NumVgprs: 256 ; GCN: ScratchSize: 1024 -define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) { +define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { bb: %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0 %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0 @@ -493,7 +493,7 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 -attributes #0 = { "enable-no-nans-fp-math"="true" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !0 = !{!1, !1, i64 0, i32 1} Index: test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll =================================================================== --- test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll +++ test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll @@ -71,7 +71,7 @@ ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.global.size.z() #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !opencl.kernels = !{!0, !1, !2}