diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1681,14 +1681,9 @@ /********** Intrinsic Patterns **********/ /********** ================== **********/ -let OtherPredicates = [isNotGFX90APlus] in -// FIXME: Should use _e64 and select source modifiers. -def : POW_Common ; - -let OtherPredicates = [isGFX90APlus] in def : GCNPat < - (fpow f32:$src0, f32:$src1), - (V_EXP_F32_e32 (V_MUL_LEGACY_F32_e64 0, f32:$src1, SRCMODS.NONE, (V_LOG_F32_e32 f32:$src0), 0, 0)) + (f32 (fpow (VOP3Mods f32:$src0, i32:$src0_mods), (VOP3Mods f32:$src1, i32:$src1_mods))), + (V_EXP_F32_e64 SRCMODS.NONE, (V_MUL_LEGACY_F32_e64 $src1_mods, $src1, SRCMODS.NONE, (V_LOG_F32_e64 $src0_mods, $src0), 0, 0)) >; def : GCNPat < diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -2,6 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s define float @v_pow_f32(float %x, float %y) { @@ -29,6 +30,14 @@ ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_f32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -75,6 +84,17 @@ ; GFX9-NEXT: v_exp_f32_e32 v1, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_v2f32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_log_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_exp_f32_e32 v1, v1 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_v2f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -125,6 +145,17 @@ ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_f16: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -196,6 +227,24 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_v2f16: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX90A-NEXT: v_log_f32_e32 v2, v2 +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -278,6 +327,24 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_v2f16_fneg_lhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX90A-NEXT: v_log_f32_e32 v2, v2 +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_v2f16_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -361,6 +428,24 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_v2f16_fneg_rhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 +; GFX90A-NEXT: v_log_f32_e32 v2, v2 +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_v2f16_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -449,6 +534,24 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 +; GFX90A-NEXT: v_log_f32_e32 v2, v2 +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -483,8 +586,7 @@ ; GFX6-LABEL: v_pow_f32_fabs_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_log_f32_e64 v0, |v0| ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -492,8 +594,7 @@ ; GFX8-LABEL: v_pow_f32_fabs_lhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_log_f32_e64 v0, |v0| ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -501,18 +602,24 @@ ; GFX9-LABEL: v_pow_f32_fabs_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_log_f32_e64 v0, |v0| ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_f32_fabs_lhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_log_f32_e64 v0, |v0| +; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX10-NEXT: v_log_f32_e32 v0, v0 +; GFX10-NEXT: v_log_f32_e64 v0, |v0| ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -526,8 +633,7 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -535,8 +641,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_log_f32_e32 v0, v0 -; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -544,18 +649,24 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_log_f32_e32 v0, v0 -; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_f32_fabs_rhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) @@ -567,45 +678,41 @@ ; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: v_log_f32_e64 v0, |v0| +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_brev_b32 s4, -2 -; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_log_f32_e32 v0, v0 -; GFX8-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_log_f32_e64 v0, |v0| +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_brev_b32 s4, -2 -; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_log_f32_e32 v0, v0 -; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_log_f32_e64 v0, |v0| +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; +; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_log_f32_e64 v0, |v0| +; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_brev_b32 s4, -2 -; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX10-NEXT: v_log_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX10-NEXT: v_log_f32_e64 v0, |v0| +; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) @@ -636,6 +743,13 @@ ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX90A-LABEL: v_pow_f32_sgpr_vgpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_log_f32_e32 v1, s0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: v_pow_f32_sgpr_vgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_log_f32_e32 v1, s0 @@ -668,6 +782,13 @@ ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX90A-LABEL: v_pow_f32_vgpr_sgpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_log_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: v_pow_f32_vgpr_sgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_log_f32_e32 v0, v0 @@ -700,6 +821,13 @@ ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: ; return to shader part epilog ; +; GFX90A-LABEL: v_pow_f32_sgpr_sgpr: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: v_log_f32_e32 v0, s0 +; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0 +; GFX90A-NEXT: v_exp_f32_e32 v0, v0 +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: v_pow_f32_sgpr_sgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_log_f32_e32 v0, s0