Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -622,6 +622,9 @@ /// Floating point extract fraction and exponent. HANDLE_TARGET_OPCODE(G_FFREXP) +/// Get Floating point environmet +HANDLE_TARGET_OPCODE(G_GET_FPENV) + /// Generic FP negation. HANDLE_TARGET_OPCODE(G_FNEG) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -989,6 +989,13 @@ let hasSideEffects = false; } +// This returns Floating point environmet value. +def G_GET_FPENV : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = false; +} + //------------------------------------------------------------------------------ // Opcodes for LLVM Intrinsics //------------------------------------------------------------------------------ Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -104,6 +104,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1766,6 +1766,8 @@ return TargetOpcode::G_FLOG2; case Intrinsic::log10: return TargetOpcode::G_FLOG10; + case Intrinsic::get_fpenv: + return TargetOpcode::G_GET_FPENV; case Intrinsic::ldexp: return TargetOpcode::G_FLDEXP; case Intrinsic::nearbyint: Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1907,6 +1907,9 @@ getActionDefinitionsBuilder(G_READCYCLECOUNTER) .legalFor({S64}); + getActionDefinitionsBuilder(G_GET_FPENV) + .legalFor({S32}); + getActionDefinitionsBuilder(G_FENCE) .alwaysLegal(); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3715,6 +3715,7 @@ case AMDGPU::G_SHUFFLE_VECTOR: case AMDGPU::G_SBFX: case AMDGPU::G_UBFX: + case AMDGPU::G_GET_FPENV: if (isSALUMapping(MI)) return getDefaultMappingSOP(MI); [[fallthrough]]; Index: llvm/test/CodeGen/AMDGPU/llvm.get.fpenv.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.get.fpenv.ll @@ -0,0 +1,732 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -march=amdgcn -global-isel -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s +; RUN: llc -march=amdgcn -global-isel -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s +; RUN: llc -march=amdgcn -global-isel -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s +; RUN: llc -march=amdgcn -global-isel -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -global-isel -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s +; RUN: llc -march=amdgcn -global-isel -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s +; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s +; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s +; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s + +declare i32 @llvm.get.fpenv.i32() + +define i32 @func_fpenv_i32() { +; GFX678-LABEL: func_fpenv_i32: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + ret i32 %fpenv +} + +define i32 @strictfp_func_fpenv_i32() strictfp { +; GFX678-LABEL: strictfp_func_fpenv_i32: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: strictfp_func_fpenv_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: strictfp_func_fpenv_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: strictfp_func_fpenv_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + ret i32 %fpenv +} + +define amdgpu_kernel void @kernel_fpenv_i32(ptr addrspace(1) %ptr) { +; GFX6-LABEL: kernel_fpenv_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: kernel_fpenv_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: kernel_fpenv_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19) +; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: kernel_fpenv_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: kernel_fpenv_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: kernel_fpenv_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %fpenv = call i32 @llvm.get.fpenv.i32() + store i32 %fpenv, ptr addrspace(1) %ptr + ret void +} + +; TODO: We should be able to reduce the demanded bits and ask for less +; from s_getreg_b32 +define i32 @func_fpenv_i32_denormonly() { +; GFX678-LABEL: func_fpenv_i32_denormonly: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_denormonly: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_denormonly: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_denormonly: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xf0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %denorm.only = and i32 %fpenv, 240 + ret i32 %denorm.only +} + +define i32 @func_fpenv_i32_roundonly() { +; GFX678-LABEL: func_fpenv_i32_roundonly: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 15 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_roundonly: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 15 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_roundonly: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 15 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_roundonly: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 15 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %round.only = and i32 %fpenv, 15 + ret i32 %round.only +} + +define i32 @func_fpenv_i32_round_denorm_only() { +; GFX678-LABEL: func_fpenv_i32_round_denorm_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_round_denorm_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_round_denorm_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_round_denorm_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %round.denorm.only = and i32 %fpenv, 255 + ret i32 %round.denorm.only +} + +define i32 @func_fpenv_i32_round_denorm_dx10_ieee() { +; GFX678-LABEL: func_fpenv_i32_round_denorm_dx10_ieee: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_round_denorm_dx10_ieee: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_round_denorm_dx10_ieee: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_round_denorm_dx10_ieee: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %core.mode = and i32 %fpenv, 1023 + ret i32 %core.mode +} + +define i32 @func_fpenv_i32_excp_en() { +; GFX678-LABEL: func_fpenv_i32_excp_en: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_excp_en: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_excp_en: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_excp_en: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %core.mode = and i32 %fpenv, 520192 + ret i32 %core.mode +} + +; Mask for all bits used on gfx6+ +define i32 @func_fpenv_i32_environment_gfx6() { +; GFX678-LABEL: func_fpenv_i32_environment_gfx6: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_environment_gfx6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_environment_gfx6: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_environment_gfx6: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %core.mode = and i32 %fpenv, 521215 + ret i32 %core.mode +} + +; Mask for all bits used on gfx9+ +define i32 @func_fpenv_i32_environment_gfx9() { +; GFX678-LABEL: func_fpenv_i32_environment_gfx9: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_environment_gfx9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_environment_gfx9: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_environment_gfx9: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %core.mode = and i32 %fpenv, 8909823 + ret i32 %core.mode +} + +define i32 @func_fpenv_i32_denormf32only() { +; GFX678-LABEL: func_fpenv_i32_denormf32only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 48 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_denormf32only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 48 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_denormf32only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 48 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_denormf32only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 48 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %denorm.only = and i32 %fpenv, 48 + ret i32 %denorm.only +} + +define i32 @func_fpenv_i32_denormf32only_0() { +; GFX678-LABEL: func_fpenv_i32_denormf32only_0: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 32 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_denormf32only_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 32 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_denormf32only_0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 32 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_denormf32only_0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 32 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %denorm.only = and i32 %fpenv, 32 + ret i32 %denorm.only +} + +define i32 @func_fpenv_i32_denormf32only_1() { +; GFX678-LABEL: func_fpenv_i32_denormf32only_1: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 64 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_denormf32only_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 64 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_denormf32only_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 64 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_denormf32only_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 64 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %denorm.only = and i32 %fpenv, 64 + ret i32 %denorm.only +} + +define i32 @func_fpenv_i32_denormf64f16only() { +; GFX678-LABEL: func_fpenv_i32_denormf64f16only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_denormf64f16only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_denormf64f16only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_denormf64f16only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xc0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %denorm.only = and i32 %fpenv, 192 + ret i32 %denorm.only +} + +define i32 @func_fpenv_i32_dx10_clamp_only() { +; GFX678-LABEL: func_fpenv_i32_dx10_clamp_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x100 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_dx10_clamp_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x100 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_dx10_clamp_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x100 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_dx10_clamp_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x100 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %dx10.only = and i32 %fpenv, 256 + ret i32 %dx10.only +} + +define i32 @func_fpenv_i32_ieee_only() { +; GFX678-LABEL: func_fpenv_i32_ieee_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x200 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpenv_i32_ieee_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpenv_i32_ieee_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x200 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpenv_i32_ieee_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x200 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpenv = call i32 @llvm.get.fpenv.i32() + %ieee.only = and i32 %fpenv, 512 + ret i32 %ieee.only +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX1011: {{.*}}