diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -56,6 +58,33 @@ ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log_f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log_f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log_f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -169,6 +198,32 @@ ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log_v2f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s3 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, s2 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3f317218, v0 :: v_dual_mul_f32 v0, 0x3f317218, v2 +; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log_v2f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s3 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log_v2f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] @@ -307,6 +362,40 @@ ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log_v3f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, s4 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v3 +; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log_v3f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2 +; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log_v3f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] @@ -473,6 +562,43 @@ ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log_v4f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s7 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, s4 +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317218, v1 :: v_dual_mul_f32 v1, 0x3f317218, v4 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v5 +; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log_v4f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, s7 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317218, v2 :: v_dual_mul_f32 v3, 0x3f317218, v3 +; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log_v4f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -533,12 +659,21 @@ } define float @v_log_f32(float %in) { -; GCN-LABEL: v_log_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32: ; R600: ; %bb.0: @@ -554,12 +689,21 @@ } define float @v_log_fabs_f32(float %in) { -; GCN-LABEL: v_log_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fabs_f32: ; R600: ; %bb.0: @@ -576,12 +720,21 @@ } define float @v_log_fneg_fabs_f32(float %in) { -; GCN-LABEL: v_log_fneg_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -|v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_fneg_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -|v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_fneg_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fneg_fabs_f32: ; R600: ; %bb.0: @@ -599,12 +752,21 @@ } define float @v_log_fneg_f32(float %in) { -; GCN-LABEL: v_log_fneg_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_fneg_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_fneg_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_fneg_f32: ; R600: ; %bb.0: @@ -621,12 +783,21 @@ } define float @v_log_f32_fast(float %in) { -; GCN-LABEL: v_log_f32_fast: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_fast: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_fast: ; R600: ; %bb.0: @@ -642,12 +813,21 @@ } define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GCN-LABEL: v_log_f32_unsafe_math_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_unsafe_math_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_unsafe_math_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -663,12 +843,21 @@ } define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GCN-LABEL: v_log_f32_approx_fn_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_approx_fn_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_approx_fn_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -684,12 +873,21 @@ } define float @v_log_f32_ninf(float %in) { -; GCN-LABEL: v_log_f32_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_ninf: ; R600: ; %bb.0: @@ -705,12 +903,21 @@ } define float @v_log_f32_afn(float %in) { -; GCN-LABEL: v_log_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn: ; R600: ; %bb.0: @@ -726,12 +933,21 @@ } define float @v_log_f32_afn_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_afn_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_afn_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_afn_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn_daz: ; R600: ; %bb.0: @@ -747,12 +963,21 @@ } define float @v_log_f32_afn_dynamic(float %in) #1 { -; GCN-LABEL: v_log_f32_afn_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_afn_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_afn_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_afn_dynamic: ; R600: ; %bb.0: @@ -768,12 +993,21 @@ } define float @v_fabs_log_f32_afn(float %in) { -; GCN-LABEL: v_fabs_log_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_fabs_log_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_fabs_log_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log_f32_afn: ; R600: ; %bb.0: @@ -790,12 +1024,21 @@ } define float @v_log_f32_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_daz: ; R600: ; %bb.0: @@ -811,12 +1054,21 @@ } define float @v_log_f32_nnan(float %in) { -; GCN-LABEL: v_log_f32_nnan: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan: ; R600: ; %bb.0: @@ -832,12 +1084,21 @@ } define float @v_log_f32_nnan_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_nnan_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_daz: ; R600: ; %bb.0: @@ -853,12 +1114,21 @@ } define float @v_log_f32_nnan_dynamic(float %in) #1 { -; GCN-LABEL: v_log_f32_nnan_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_dynamic: ; R600: ; %bb.0: @@ -874,12 +1144,21 @@ } define float @v_log_f32_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_ninf_daz: ; R600: ; %bb.0: @@ -895,12 +1174,21 @@ } define float @v_log_f32_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log_f32_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_ninf_dynamic: ; R600: ; %bb.0: @@ -916,12 +1204,21 @@ } define float @v_log_f32_nnan_ninf(float %in) { -; GCN-LABEL: v_log_f32_nnan_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_ninf: ; R600: ; %bb.0: @@ -937,12 +1234,21 @@ } define float @v_log_f32_nnan_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_nnan_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_ninf_daz: ; R600: ; %bb.0: @@ -958,12 +1264,21 @@ } define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log_f32_nnan_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_nnan_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_nnan_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: @@ -979,12 +1294,21 @@ } define float @v_log_f32_fast_daz(float %in) #0 { -; GCN-LABEL: v_log_f32_fast_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_fast_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_fast_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_fast_daz: ; R600: ; %bb.0: @@ -1000,12 +1324,21 @@ } define float @v_log_f32_dynamic_mode(float %in) #1 { -; GCN-LABEL: v_log_f32_dynamic_mode: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_dynamic_mode: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_dynamic_mode: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_dynamic_mode: ; R600: ; %bb.0: @@ -1021,12 +1354,21 @@ } define float @v_log_f32_undef() { -; GCN-LABEL: v_log_f32_undef: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, s4 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_undef: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, s4 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_undef: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, s0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_undef: ; R600: ; %bb.0: @@ -1042,19 +1384,37 @@ } define float @v_log_f32_0() { -; GCN-SDAG-LABEL: v_log_f32_0: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log_f32_0: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0x3f317218 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log_f32_0: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log_f32_0: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x3f317218 +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_0: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_0: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0x3f317218 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_0: ; R600: ; %bb.0: @@ -1070,13 +1430,24 @@ } define float @v_log_f32_from_fpext_f16(i16 %src.i) { -; GCN-LABEL: v_log_f32_from_fpext_f16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log_f32_from_fpext_f16: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log_f32_from_fpext_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_f16: ; R600: ; %bb.0: @@ -1134,6 +1505,18 @@ ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_f32_from_fpext_math_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_f32_from_fpext_math_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1152,20 +1535,40 @@ } define float @v_log_f32_from_fpext_bf16(bfloat %src) { -; GCN-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log_f32_from_fpext_bf16: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log_f32_from_fpext_bf16: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_bf16: ; R600: ; %bb.0: @@ -1214,6 +1617,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1260,6 +1672,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1307,6 +1728,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_fneg_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fneg_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1355,6 +1785,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_fneg_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fneg_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1402,6 +1841,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1474,6 +1922,20 @@ ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_v2f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1563,6 +2025,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1657,6 +2149,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log_fneg_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_fneg_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fneg_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1752,6 +2274,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log_fneg_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log_fneg_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_fneg_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1825,6 +2377,20 @@ ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log_v2f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0 +; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1 +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log_v2f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -56,6 +58,33 @@ ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log10_f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log10_f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log10_f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -169,6 +198,32 @@ ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log10_v2f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s3 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, s2 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v0 :: v_dual_mul_f32 v0, 0x3e9a209b, v2 +; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log10_v2f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s3 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log10_v2f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] @@ -307,6 +362,40 @@ ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log10_v3f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, s4 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v3 +; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log10_v3f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2 +; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log10_v3f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] @@ -473,6 +562,43 @@ ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log10_v4f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s7 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, s4 +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v1 :: v_dual_mul_f32 v1, 0x3e9a209b, v4 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v5 +; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log10_v4f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, s7 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v2 :: v_dual_mul_f32 v3, 0x3e9a209b, v3 +; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log10_v4f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -533,12 +659,21 @@ } define float @v_log10_f32(float %in) { -; GCN-LABEL: v_log10_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32: ; R600: ; %bb.0: @@ -554,12 +689,21 @@ } define float @v_log10_fabs_f32(float %in) { -; GCN-LABEL: v_log10_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fabs_f32: ; R600: ; %bb.0: @@ -576,12 +720,21 @@ } define float @v_log10_fneg_fabs_f32(float %in) { -; GCN-LABEL: v_log10_fneg_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -|v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_fneg_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -|v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_fneg_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fneg_fabs_f32: ; R600: ; %bb.0: @@ -599,12 +752,21 @@ } define float @v_log10_fneg_f32(float %in) { -; GCN-LABEL: v_log10_fneg_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_fneg_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_fneg_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_fneg_f32: ; R600: ; %bb.0: @@ -621,12 +783,21 @@ } define float @v_log10_f32_fast(float %in) { -; GCN-LABEL: v_log10_f32_fast: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_fast: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_fast: ; R600: ; %bb.0: @@ -642,12 +813,21 @@ } define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GCN-LABEL: v_log10_f32_unsafe_math_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_unsafe_math_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_unsafe_math_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -663,12 +843,21 @@ } define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GCN-LABEL: v_log10_f32_approx_fn_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_approx_fn_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_approx_fn_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -684,12 +873,21 @@ } define float @v_log10_f32_ninf(float %in) { -; GCN-LABEL: v_log10_f32_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_ninf: ; R600: ; %bb.0: @@ -705,12 +903,21 @@ } define float @v_log10_f32_afn(float %in) { -; GCN-LABEL: v_log10_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn: ; R600: ; %bb.0: @@ -726,12 +933,21 @@ } define float @v_log10_f32_afn_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_afn_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_afn_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_afn_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn_daz: ; R600: ; %bb.0: @@ -747,12 +963,21 @@ } define float @v_log10_f32_afn_dynamic(float %in) #1 { -; GCN-LABEL: v_log10_f32_afn_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_afn_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_afn_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_afn_dynamic: ; R600: ; %bb.0: @@ -768,12 +993,21 @@ } define float @v_fabs_log10_f32_afn(float %in) { -; GCN-LABEL: v_fabs_log10_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_fabs_log10_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_fabs_log10_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log10_f32_afn: ; R600: ; %bb.0: @@ -790,12 +1024,21 @@ } define float @v_log10_f32_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_daz: ; R600: ; %bb.0: @@ -811,12 +1054,21 @@ } define float @v_log10_f32_nnan(float %in) { -; GCN-LABEL: v_log10_f32_nnan: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan: ; R600: ; %bb.0: @@ -832,12 +1084,21 @@ } define float @v_log10_f32_nnan_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_nnan_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_daz: ; R600: ; %bb.0: @@ -853,12 +1114,21 @@ } define float @v_log10_f32_nnan_dynamic(float %in) #1 { -; GCN-LABEL: v_log10_f32_nnan_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_dynamic: ; R600: ; %bb.0: @@ -874,12 +1144,21 @@ } define float @v_log10_f32_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_ninf_daz: ; R600: ; %bb.0: @@ -895,12 +1174,21 @@ } define float @v_log10_f32_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log10_f32_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_ninf_dynamic: ; R600: ; %bb.0: @@ -916,12 +1204,21 @@ } define float @v_log10_f32_nnan_ninf(float %in) { -; GCN-LABEL: v_log10_f32_nnan_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_ninf: ; R600: ; %bb.0: @@ -937,12 +1234,21 @@ } define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_nnan_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_ninf_daz: ; R600: ; %bb.0: @@ -958,12 +1264,21 @@ } define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log10_f32_nnan_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_nnan_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_nnan_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: @@ -979,12 +1294,21 @@ } define float @v_log10_f32_fast_daz(float %in) #0 { -; GCN-LABEL: v_log10_f32_fast_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_fast_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_fast_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_fast_daz: ; R600: ; %bb.0: @@ -1000,12 +1324,21 @@ } define float @v_log10_f32_dynamic_mode(float %in) #1 { -; GCN-LABEL: v_log10_f32_dynamic_mode: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_dynamic_mode: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_dynamic_mode: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_dynamic_mode: ; R600: ; %bb.0: @@ -1021,12 +1354,21 @@ } define float @v_log10_f32_undef() { -; GCN-LABEL: v_log10_f32_undef: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, s4 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_undef: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, s4 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_undef: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, s0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_undef: ; R600: ; %bb.0: @@ -1042,19 +1384,37 @@ } define float @v_log10_f32_0() { -; GCN-SDAG-LABEL: v_log10_f32_0: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log10_f32_0: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0x3e9a209b -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log10_f32_0: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log10_f32_0: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x3e9a209b +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_0: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_0: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0x3e9a209b +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0xff800000, v0 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_0: ; R600: ; %bb.0: @@ -1070,13 +1430,24 @@ } define float @v_log10_f32_from_fpext_f16(i16 %src.i) { -; GCN-LABEL: v_log10_f32_from_fpext_f16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log10_f32_from_fpext_f16: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log10_f32_from_fpext_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_f16: ; R600: ; %bb.0: @@ -1134,6 +1505,18 @@ ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_f32_from_fpext_math_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_f32_from_fpext_math_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1152,20 +1535,40 @@ } define float @v_log10_f32_from_fpext_bf16(bfloat %src) { -; GCN-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log10_f32_from_fpext_bf16: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log10_f32_from_fpext_bf16: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_bf16: ; R600: ; %bb.0: @@ -1214,6 +1617,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1260,6 +1672,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1307,6 +1728,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_fneg_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fneg_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1355,6 +1785,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_fneg_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fneg_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1402,6 +1841,15 @@ ; GFX900-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1474,6 +1922,20 @@ ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_v2f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1563,6 +2025,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log10_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1657,6 +2149,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fneg_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1752,6 +2274,36 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log10_fneg_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log10_fneg_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_fneg_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1825,6 +2377,20 @@ ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log10_v2f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 +; GFX1100-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log10_v2f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -42,6 +44,18 @@ ; GFX900-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-NEXT: s_endpgm ; +; GFX1100-LABEL: s_log2_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_clause 0x1 +; GFX1100-NEXT: s_load_b32 s2, s[0:1], 0x2c +; GFX1100-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-NEXT: v_mov_b32_e32 v1, 0 +; GFX1100-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-NEXT: s_endpgm +; ; R600-LABEL: s_log2_f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -140,6 +154,28 @@ ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log2_v2f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s3 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log2_v2f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s2 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s3 +; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log2_v2f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -253,6 +289,34 @@ ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log2_v3f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-SDAG-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log2_v3f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log2_v3f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -384,6 +448,36 @@ ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; +; GFX1100-SDAG-LABEL: s_log2_v4f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_clause 0x1 +; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, s7 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-SDAG-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log2_v4f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, s4 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, s5 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, s6 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, s7 +; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX1100-GISEL-NEXT: s_endpgm +; ; R600-LABEL: s_log2_v4f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] @@ -429,11 +523,18 @@ } define float @v_log2_f32(float %in) { -; GCN-LABEL: v_log2_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32: ; R600: ; %bb.0: @@ -449,11 +550,18 @@ } define float @v_log2_fabs_f32(float %in) { -; GCN-LABEL: v_log2_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fabs_f32: ; R600: ; %bb.0: @@ -470,11 +578,18 @@ } define float @v_log2_fneg_fabs_f32(float %in) { -; GCN-LABEL: v_log2_fneg_fabs_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -|v0| -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_fneg_fabs_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -|v0| +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_fneg_fabs_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -|v0| +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_fabs_f32: ; R600: ; %bb.0: @@ -492,11 +607,18 @@ } define float @v_log2_fneg_f32(float %in) { -; GCN-LABEL: v_log2_fneg_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, -v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_fneg_f32: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, -v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_fneg_f32: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, -v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_f32: ; R600: ; %bb.0: @@ -513,11 +635,18 @@ } define float @v_log2_f32_fast(float %in) { -; GCN-LABEL: v_log2_f32_fast: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_fast: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast: ; R600: ; %bb.0: @@ -533,11 +662,18 @@ } define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GCN-LABEL: v_log2_f32_unsafe_math_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_unsafe_math_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_unsafe_math_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_unsafe_math_attr: ; R600: ; %bb.0: @@ -553,11 +689,18 @@ } define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GCN-LABEL: v_log2_f32_approx_fn_attr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_approx_fn_attr: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_approx_fn_attr: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_approx_fn_attr: ; R600: ; %bb.0: @@ -573,11 +716,18 @@ } define float @v_log2_f32_ninf(float %in) { -; GCN-LABEL: v_log2_f32_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf: ; R600: ; %bb.0: @@ -593,11 +743,18 @@ } define float @v_log2_f32_afn(float %in) { -; GCN-LABEL: v_log2_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn: ; R600: ; %bb.0: @@ -613,11 +770,18 @@ } define float @v_log2_f32_afn_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_afn_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_afn_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_afn_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_daz: ; R600: ; %bb.0: @@ -633,11 +797,18 @@ } define float @v_log2_f32_afn_dynamic(float %in) #1 { -; GCN-LABEL: v_log2_f32_afn_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_afn_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_afn_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_dynamic: ; R600: ; %bb.0: @@ -653,11 +824,18 @@ } define float @v_fabs_log2_f32_afn(float %in) { -; GCN-LABEL: v_fabs_log2_f32_afn: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e64 v0, |v0| -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_fabs_log2_f32_afn: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e64 v0, |v0| +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_fabs_log2_f32_afn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e64 v0, |v0| +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log2_f32_afn: ; R600: ; %bb.0: @@ -674,11 +852,18 @@ } define float @v_log2_f32_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_daz: ; R600: ; %bb.0: @@ -694,11 +879,18 @@ } define float @v_log2_f32_nnan(float %in) { -; GCN-LABEL: v_log2_f32_nnan: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan: ; R600: ; %bb.0: @@ -714,11 +906,18 @@ } define float @v_log2_f32_nnan_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_nnan_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_daz: ; R600: ; %bb.0: @@ -734,11 +933,18 @@ } define float @v_log2_f32_nnan_dynamic(float %in) #1 { -; GCN-LABEL: v_log2_f32_nnan_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_dynamic: ; R600: ; %bb.0: @@ -754,11 +960,18 @@ } define float @v_log2_f32_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf_daz: ; R600: ; %bb.0: @@ -774,11 +987,18 @@ } define float @v_log2_f32_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log2_f32_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf_dynamic: ; R600: ; %bb.0: @@ -794,11 +1014,18 @@ } define float @v_log2_f32_nnan_ninf(float %in) { -; GCN-LABEL: v_log2_f32_nnan_ninf: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan_ninf: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan_ninf: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf: ; R600: ; %bb.0: @@ -814,11 +1041,18 @@ } define float @v_log2_f32_nnan_ninf_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_nnan_ninf_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan_ninf_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan_ninf_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf_daz: ; R600: ; %bb.0: @@ -834,11 +1068,18 @@ } define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { -; GCN-LABEL: v_log2_f32_nnan_ninf_dynamic: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_nnan_ninf_dynamic: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_nnan_ninf_dynamic: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: @@ -854,11 +1095,18 @@ } define float @v_log2_f32_fast_daz(float %in) #0 { -; GCN-LABEL: v_log2_f32_fast_daz: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_fast_daz: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_fast_daz: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast_daz: ; R600: ; %bb.0: @@ -874,11 +1122,18 @@ } define float @v_log2_f32_dynamic_mode(float %in) #1 { -; GCN-LABEL: v_log2_f32_dynamic_mode: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_dynamic_mode: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_dynamic_mode: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_dynamic_mode: ; R600: ; %bb.0: @@ -894,11 +1149,18 @@ } define float @v_log2_f32_undef() { -; GCN-LABEL: v_log2_f32_undef: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_undef: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_log_f32_e32 v0, s4 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_undef: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f32_e32 v0, s0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_undef: ; R600: ; %bb.0: @@ -914,17 +1176,31 @@ } define float @v_log2_f32_0() { -; GCN-SDAG-LABEL: v_log2_f32_0: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log2_f32_0: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_0: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log2_f32_0: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_0: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_0: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_0: ; R600: ; %bb.0: @@ -950,12 +1226,21 @@ } define float @v_log2_f32_from_fpext_f16(i16 %src.i) { -; GCN-LABEL: v_log2_f32_from_fpext_f16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_from_fpext_f16: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: v_log2_f32_from_fpext_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_f16: ; R600: ; %bb.0: @@ -1009,6 +1294,16 @@ ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_f32_from_fpext_math_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_f32_from_fpext_math_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1034,18 +1329,34 @@ } define float @v_log2_f32_from_fpext_bf16(bfloat %src) { -; GCN-SDAG-LABEL: v_log2_f32_from_fpext_bf16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_log2_f32_from_fpext_bf16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX689-SDAG-LABEL: v_log2_f32_from_fpext_bf16: +; GFX689-SDAG: ; %bb.0: +; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX689-GISEL-LABEL: v_log2_f32_from_fpext_bf16: +; GFX689-GISEL: ; %bb.0: +; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_bf16: ; R600: ; %bb.0: @@ -1101,6 +1412,13 @@ ; GFX900-NEXT: v_log_f16_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1148,6 +1466,13 @@ ; GFX900-NEXT: v_log_f16_e64 v0, |v0| ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1196,6 +1521,13 @@ ; GFX900-NEXT: v_log_f16_e64 v0, -|v0| ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_fneg_fabs_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fneg_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1245,6 +1577,13 @@ ; GFX900-NEXT: v_log_f16_e64 v0, -v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_fneg_f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fneg_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1293,6 +1632,13 @@ ; GFX900-NEXT: v_log_f16_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1366,6 +1712,18 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_v2f16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1441,6 +1799,31 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log2_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1521,6 +1904,31 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1| +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fneg_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1602,6 +2010,31 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-SDAG-LABEL: v_log2_fneg_v2f16: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-LABEL: v_log2_fneg_v2f16: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_fneg_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1671,6 +2104,18 @@ ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX1100-LABEL: v_log2_v2f16_fast: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX1100-NEXT: v_log_f16_e32 v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_log_f16_e32 v1, v1 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_log2_v2f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -1,18 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define i16 @v_powi_f16(i16 %l, i32 %r) { -; GCN-LABEL: v_powi_f16: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_f16: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX78-NEXT: v_log_f32_e32 v0, v0 +; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX78-NEXT: v_exp_f32_e32 v0, v0 +; GFX78-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_log_f32_e32 v0, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 +; GFX11-NEXT: v_exp_f32_e32 v0, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %l.cast = bitcast i16 %l to half %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r) %res.cast = bitcast half %res to i16 @@ -20,33 +36,58 @@ } define float @v_powi_f32(float %l, i32 %r) { -; GCN-LABEL: v_powi_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_log_f32_e32 v0, v0 +; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX78-NEXT: v_exp_f32_e32 v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_log_f32_e32 v0, v0 +; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_exp_f32_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 %r) ret float %res } define float @v_powi_0_f32(float %l) { -; GCN-LABEL: v_powi_0_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_0_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_0_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 0) ret float %res } define float @v_powi_1_f32(float %l) { -; GCN-LABEL: v_powi_1_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_1_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_1_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 1) ret float %res } @@ -83,16 +124,45 @@ ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_neg1_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_rcp_f32_e32 v2, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 +; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 -1) ret float %res } define float @v_powi_2_f32(float %l) { -; GCN-LABEL: v_powi_2_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_2_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_2_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 2) ret float %res } @@ -131,58 +201,129 @@ ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_neg2_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 +; GFX11-NEXT: v_rcp_f32_e32 v2, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 +; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 -2) ret float %res } define float @v_powi_4_f32(float %l) { -; GCN-LABEL: v_powi_4_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_4_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_4_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 4) ret float %res } define float @v_powi_8_f32(float %l) { -; GCN-LABEL: v_powi_8_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_8_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_8_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 8) ret float %res } define float @v_powi_16_f32(float %l) { -; GCN-LABEL: v_powi_16_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_16_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_16_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 16) ret float %res } define float @v_powi_128_f32(float %l) { -; GCN-LABEL: v_powi_128_f32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: v_mul_f32_e32 v0, v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX78-LABEL: v_powi_128_f32: +; GFX78: ; %bb.0: +; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX78-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_128_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 128) ret float %res } @@ -233,6 +374,39 @@ ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_powi_neg128_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 +; GFX11-NEXT: v_rcp_f32_e32 v2, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 +; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 +; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 -128) ret float %res }