Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -999,6 +999,13 @@ :ref:`llvm.stacksave.p5 ` Implemented, must use the alloca address space. :ref:`llvm.stackrestore.p5 ` Implemented, must use the alloca address space. + :ref:`llvm.get.fpmode.i32 ` The natural floating-point mode type is i32. This + implemented by extracting relevant bits out of the MODE + register with s_getreg_b32. The first 10 bits are the + core floating-point mode. Bits 12:18 are the exception + mask. On gfx9+, bit 23 is FP16_OVFL. Bitfields not + relevant to floating-point instructions are 0s. + llvm.amdgcn.wave.reduce.umin Performs an arithmetic unsigned min reduction on the unsigned values provided by each lane in the wavefront. Intrinsic takes a hint for reduction strategy using second operand Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -25568,6 +25568,7 @@ to default state. It is similar to the call 'fesetenv(FE_DFL_ENV)', except it does not return any value. +.. _int_get_fpmode: '``llvm.get.fpmode``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -756,6 +756,10 @@ setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); + // TODO: Could move this to custom lowering, could benefit from combines on + // extract of relevant bits. + setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal); + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -870,6 +870,8 @@ // This is hasSideEffects to allow its use in readcyclecounter selection. // FIXME: Need to truncate immediate to 16-bits. +// FIXME: Missing mode register use. Should have separate pseudos for +// known may read MODE and only read MODE. def S_GETREG_B32 : SOPK_Pseudo < "s_getreg_b32", (outs SReg_32:$sdst), (ins hwreg:$simm16), @@ -1421,6 +1423,66 @@ (S_WAIT_EVENT (i16 0)) >; +// The first 10 bits of the mode register are the core FP mode on all +// subtargets. +// +// The high bits include additional fields, intermixed with some +// non-floating point environment information. We extract the full +// register and clear non-relevant bits. +// +// EXCP_EN covers floating point exceptions, but also some other +// non-FP exceptions. +// +// Bits 12-18 cover the relevant exception mask on all subtargets. +// +// FIXME: Bit 18 is int_div0, should this be in the FP environment? I +// think the only source is v_rcp_iflag_i32. +// +// On GFX9+: +// Bit 23 is the additional FP16_OVFL mode. +// +// Bits 19, 20, and 21 cover non-FP exceptions and differ between +// gfx9/10/11, so we ignore them here. + +// TODO: Would it be cheaper to emit multiple s_getreg_b32 calls for +// the ranges and combine the results? + +defvar fp_round_mask = !add(!shl(1, 4), -1); +defvar fp_denorm_mask = !shl(!add(!shl(1, 4), -1), 4); +defvar dx10_clamp_mask = !shl(1, 8); +defvar ieee_mode_mask = !shl(1, 9); + +// Covers fp_round, fp_denorm, dx10_clamp, and IEEE bit. +defvar fpmode_mask = + !or(fp_round_mask, fp_denorm_mask, dx10_clamp_mask, ieee_mode_mask); + +defvar fp_excp_en_mask = !shl(!add(!shl(1, 7), -1), 12); +defvar fp16_ovfl = !shl(1, 23); +defvar fpmode_mask_gfx6plus = !or(fpmode_mask, fp_excp_en_mask); +defvar fpmode_mask_gfx9plus = !or(fpmode_mask_gfx6plus, fp16_ovfl); + +class GetFPModePat : GCNPat< + (i32 get_fpmode), + (S_AND_B32 (i32 fpmode_mask), + (S_GETREG_B32 getHwRegImm< + HWREG.MODE, 0, + !add(!logtwo(fpmode_mask), 1)>.ret)) +>; + +// TODO: Might be worth moving to custom lowering so the and is +// exposed to demanded bits optimizations. Most users probably only +// care about the rounding or denorm mode bits. We also can reduce the +// demanded read from the getreg immediate. +let SubtargetPredicate = isGFX9Plus in { +// Last bit = FP16_OVFL +def : GetFPModePat; +} + +// Last bit = EXCP_EN.int_div0 +let SubtargetPredicate = isNotGFX9Plus in { +def : GetFPModePat; +} + //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll @@ -0,0 +1,697 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s +; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s +; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s + +declare i32 @llvm.get.fpmode.i32() + +define i32 @func_fpmode_i32() { +; GFX678-LABEL: func_fpmode_i32: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + ret i32 %fpmode +} + +define i32 @strictfp_func_fpmode_i32() strictfp { +; GFX678-LABEL: strictfp_func_fpmode_i32: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: strictfp_func_fpmode_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: strictfp_func_fpmode_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: strictfp_func_fpmode_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + ret i32 %fpmode +} + +define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) { +; GFX6-LABEL: kernel_fpmode_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: kernel_fpmode_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: kernel_fpmode_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19) +; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: kernel_fpmode_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: kernel_fpmode_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: kernel_fpmode_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2 +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %fpmode = call i32 @llvm.get.fpmode.i32() + store i32 %fpmode, ptr addrspace(1) %ptr + ret void +} + +; TODO: We should be able to reduce the demanded bits and ask for less +; from s_getreg_b32 +define i32 @func_fpmode_i32_denormonly() { +; GFX678-LABEL: func_fpmode_i32_denormonly: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_denormonly: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_denormonly: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xf0 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_denormonly: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xf0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %denorm.only = and i32 %fpmode, 240 + ret i32 %denorm.only +} + +define i32 @func_fpmode_i32_roundonly() { +; GFX678-LABEL: func_fpmode_i32_roundonly: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 15 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_roundonly: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 15 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_roundonly: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 15 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_roundonly: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 15 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %round.only = and i32 %fpmode, 15 + ret i32 %round.only +} + +define i32 @func_fpmode_i32_round_denorm_only() { +; GFX678-LABEL: func_fpmode_i32_round_denorm_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_round_denorm_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_round_denorm_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_round_denorm_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %round.denorm.only = and i32 %fpmode, 255 + ret i32 %round.denorm.only +} + +define i32 @func_fpmode_i32_round_denorm_dx10_ieee() { +; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %core.mode = and i32 %fpmode, 1023 + ret i32 %core.mode +} + +define i32 @func_fpmode_i32_excp_en() { +; GFX678-LABEL: func_fpmode_i32_excp_en: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_excp_en: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_excp_en: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_excp_en: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %core.mode = and i32 %fpmode, 520192 + ret i32 %core.mode +} + +; Mask for all bits used on gfx6+ +define i32 @func_fpmode_i32_environment_gfx6() { +; GFX678-LABEL: func_fpmode_i32_environment_gfx6: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_environment_gfx6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_environment_gfx6: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_environment_gfx6: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %core.mode = and i32 %fpmode, 521215 + ret i32 %core.mode +} + +; Mask for all bits used on gfx9+ +define i32 @func_fpmode_i32_environment_gfx9() { +; GFX678-LABEL: func_fpmode_i32_environment_gfx9: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_environment_gfx9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_environment_gfx9: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_environment_gfx9: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %core.mode = and i32 %fpmode, 8909823 + ret i32 %core.mode +} + +define i32 @func_fpmode_i32_denormf32only() { +; GFX678-LABEL: func_fpmode_i32_denormf32only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 48 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_denormf32only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 48 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_denormf32only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 48 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_denormf32only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 48 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %denorm.only = and i32 %fpmode, 48 + ret i32 %denorm.only +} + +define i32 @func_fpmode_i32_denormf32only_0() { +; GFX678-LABEL: func_fpmode_i32_denormf32only_0: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 32 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_denormf32only_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 32 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_denormf32only_0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 32 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_denormf32only_0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 32 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %denorm.only = and i32 %fpmode, 32 + ret i32 %denorm.only +} + +define i32 @func_fpmode_i32_denormf32only_1() { +; GFX678-LABEL: func_fpmode_i32_denormf32only_1: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 64 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_denormf32only_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 64 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_denormf32only_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 64 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_denormf32only_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 64 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %denorm.only = and i32 %fpmode, 64 + ret i32 %denorm.only +} + +define i32 @func_fpmode_i32_denormf64f16only() { +; GFX678-LABEL: func_fpmode_i32_denormf64f16only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_denormf64f16only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_denormf64f16only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0xc0 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_denormf64f16only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0xc0 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %denorm.only = and i32 %fpmode, 192 + ret i32 %denorm.only +} + +define i32 @func_fpmode_i32_dx10_clamp_only() { +; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x100 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x100 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x100 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x100 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %dx10.only = and i32 %fpmode, 256 + ret i32 %dx10.only +} + +define i32 @func_fpmode_i32_ieee_only() { +; GFX678-LABEL: func_fpmode_i32_ieee_only: +; GFX678: ; %bb.0: +; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) +; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 +; GFX678-NEXT: s_and_b32 s4, s4, 0x200 +; GFX678-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_fpmode_i32_ieee_only: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX9-NEXT: s_and_b32 s4, s4, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: func_fpmode_i32_ieee_only: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) +; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 +; GFX10-NEXT: s_and_b32 s4, s4, 0x200 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: func_fpmode_i32_ieee_only: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) +; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 +; GFX11-NEXT: s_and_b32 s0, s0, 0x200 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %fpmode = call i32 @llvm.get.fpmode.i32() + %ieee.only = and i32 %fpmode, 512 + ret i32 %ieee.only +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX1011: {{.*}}