diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -64,6 +64,15 @@ [{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>; +def fp_minmax_to_med3 : GICombineRule< + (defs root:$min_or_max, med3_matchdata:$matchinfo), + (match (wip_match_opcode G_FMAXNUM, + G_FMINNUM, + G_FMAXNUM_IEEE, + G_FMINNUM_IEEE):$min_or_max, + [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), + (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>; + def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">; def remove_fcanonicalize : GICombineRule< @@ -102,7 +111,9 @@ } def AMDGPURegBankCombinerHelper : GICombinerHelper< - "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain]> { + "AMDGPUGenRegBankCombinerHelper", + [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, + fp_minmax_to_med3]> { let DisableRuleOption = "amdgpuregbankcombiner-disable-rule"; let StateClass = "AMDGPURegBankCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -172,6 +172,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -16,6 +16,7 @@ #include "AMDGPURegisterBankInfo.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -36,13 +37,15 @@ MachineRegisterInfo &MRI; const RegisterBankInfo &RBI; const TargetRegisterInfo &TRI; + const SIInstrInfo &TII; CombinerHelper &Helper; public: AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) : B(B), MF(B.getMF()), MRI(*B.getMRI()), RBI(*MF.getSubtarget().getRegBankInfo()), - TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){}; + TRI(*MF.getSubtarget().getRegisterInfo()), + TII(*MF.getSubtarget().getInstrInfo()), Helper(Helper){}; bool isVgprRegBank(Register Reg); Register getAsVgpr(Register Reg); @@ -63,7 +66,13 @@ Register &Val, CstTy &K0, CstTy &K1); bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); + bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); + +private: + AMDGPU::SIModeRegisterDefaults getMode(); + bool getIEEE(); + bool isFminnumIeee(const MachineInstr &MI); }; bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { @@ -98,6 +107,13 @@ case AMDGPU::G_UMAX: case AMDGPU::G_UMIN: return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; + case AMDGPU::G_FMAXNUM: + case AMDGPU::G_FMINNUM: + return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3}; + case AMDGPU::G_FMAXNUM_IEEE: + case AMDGPU::G_FMINNUM_IEEE: + return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE, + AMDGPU::G_AMDGPU_FMED3}; } } @@ -148,6 +164,59 @@ return true; } +// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) +// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K +// ieee = false : min/max(NaN, K) = K +// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input. +// Other operand commutes (see matchMed) give same result since min and max are +// commutative. + +// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1 +// with fmed3(Val, K0, K1). +// Val = SNaN only for ieee = true +// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1 +// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1 +// max(min(SNaN, K1), K0) = max(K1, K0) = K1 +// Val = NaN,ieee = false or Val = QNaN,ieee = true +// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0 +// min(max(NaN, K0), K1) = min(K0, K1) = K0 +// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0 +bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3( + MachineInstr &MI, Med3MatchInfo &MatchInfo) { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (Ty != LLT::scalar(16) && Ty != LLT::scalar(32)) + return false; + + auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); + + Register Val; + Optional K0, K1; + // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. + if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1)) + return false; + + if (K0->Value > K1->Value) + return false; + + // For IEEE=false perform combine only when it's safe to assume that there are + // no NaN inputs. Most often MI is marked with nnan fast math flag. + // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to + // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner + // nodes(max/min) have same behavior when one input is NaN and other isn't. + // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN, + // also post-legalizer inputs to min/max are fcanonicalized (never SNaN). + if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) { + // Don't fold single use constant that can't be inlined. + if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) && + (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) { + MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; + return true; + } + } + + return false; +} void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) { B.setInstrAndDebugLoc(MI); @@ -158,6 +227,16 @@ MI.eraseFromParent(); } +AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() { + return MF.getInfo()->getMode(); +} + +bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; } + +bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) { + return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE; +} + class AMDGPURegBankCombinerHelperState { protected: CombinerHelper &Helper; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2866,6 +2866,12 @@ let hasSideEffects = 0; } +def G_AMDGPU_FMED3 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2); + let hasSideEffects = 0; +} + // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector // operand Expects a MachineMemOperand in addition to explicit // operands. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll @@ -0,0 +1,260 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define float @test_min_max_ValK0_K1_f32(float %a) #0 { +; GFX10-LABEL: test_min_max_ValK0_K1_f32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0) + %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0) + ret float %fmed +} + +define float @test_min_max_K0Val_K1_f32(float %a) #1 { +; GFX10-LABEL: test_min_max_K0Val_K1_f32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a) + %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0) + ret float %fmed +} + +; min-max patterns for ieee=true do not have to check for NaNs +; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true +define half @test_min_K1max_ValK0_f16(half %a) #0 { +; GFX10-LABEL: test_min_K1max_ValK0_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0) + %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum) + ret half %fmed +} + +define half @test_min_K1max_K0Val_f16(half %a) #1 { +; GFX10-LABEL: test_min_K1max_K0Val_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a) + %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum) + ret half %fmed +} + +; max-mix patterns work only for non-NaN inputs +define float @test_max_min_ValK1_K0_f32(float %a) #0 { +; GFX10-LABEL: test_max_min_ValK1_K0_f32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0) + %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0) + ret float %fmed +} + +define float @test_max_min_K1Val_K0_f32(float %a) #1 { +; GFX10-LABEL: test_max_min_K1Val_K0_f32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a) + %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0) + ret float %fmed +} + +define half @test_max_K0min_ValK1_f16(half %a) #0 { +; GFX10-LABEL: test_max_K0min_ValK1_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0) + %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum) + ret half %fmed +} + +define half @test_max_K0min_K1Val_f16(half %a) #1 { +; GFX10-LABEL: test_max_K0min_K1Val_f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a) + %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum) + ret half %fmed +} + +; global nnan function attribute always forces fmed3 combine + +define float @test_min_max_global_nnan(float %a) #2 { +; GFX10-LABEL: test_min_max_global_nnan: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0) + %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0) + ret float %fmed +} + +define float @test_max_min_global_nnan(float %a) #2 { +; GFX10-LABEL: test_max_min_global_nnan: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call float @llvm.minnum.f32(float %a, float 4.0) + %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) + ret float %fmed +} + +; ------------------------------------------------------------------------------ +; Negative patterns +; ------------------------------------------------------------------------------ + +; min(max(Val, K0), K1) K0 > K1, should be K0<=K1 +define float @test_min_max_K0_gt_K1(float %a) #0 { +; GFX10-LABEL: test_min_max_K0_gt_K1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0) + %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0) + ret float %fmed +} + +; max(min(Val, K1), K0) K0 > K1, should be K0<=K1 +define float @test_max_min_K0_gt_K1(float %a) #0 { +; GFX10-LABEL: test_max_min_K0_gt_K1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0) + %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0) + ret float %fmed +} + +; non-inline constant +define float @test_min_max_non_inline_const(float %a) #0 { +; GFX10-LABEL: test_min_max_non_inline_const: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0) + %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0) + ret float %fmed +} + +; there is no fmed3 for f64 or v2f16 types + +define double @test_min_max_f64(double %a) #0 { +; GFX10-LABEL: test_min_max_f64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0) + %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0) + ret double %fmed +} + +define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 { +; GFX10-LABEL: test_min_max_v2f16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> ) + %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> ) + ret <2 x half> %fmed +} + +; input that can be NaN + +; min-max patterns for ieee=false require known non-NaN input +define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { +; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0) + %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0) + ret float %fmed +} + +; max-min patterns always require known non-NaN input + +define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { +; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call float @llvm.minnum.f32(float %a, float 4.0) + %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) + ret float %fmed +} + +; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true +define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { +; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %minnum = call float @llvm.minnum.f32(float %a, float 4.0) + %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) + ret float %fmed +} + +declare half @llvm.minnum.f16(half, half) +declare half @llvm.maxnum.f16(half, half) +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) +declare double @llvm.maxnum.f64(double, double) +declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) +declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) +attributes #0 = {"amdgpu-ieee"="true"} +attributes #1 = {"amdgpu-ieee"="false"} +attributes #2 = {"no-nans-fp-math"="true"} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir @@ -0,0 +1,553 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_min_max_ValK0_K1_f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_ValK0_K1_f32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_min_max_K0Val_K1_f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_K0Val_K1_f32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMAXNUM %7, %0 + %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMINNUM %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_min_K1max_ValK0_f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_K1max_ValK0_f16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s16) = G_FCANONICALIZE [[TRUNC]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %2:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s16) = G_TRUNC %2(s32) + %3:sgpr(s16) = G_FCONSTANT half 0xH4000 + %9:vgpr(s16) = G_FCANONICALIZE %0 + %10:vgpr(s16) = COPY %3(s16) + %4:vgpr(s16) = G_FMAXNUM_IEEE %9, %10 + %5:sgpr(s16) = G_FCONSTANT half 0xH4400 + %11:vgpr(s16) = COPY %5(s16) + %6:vgpr(s16) = G_FMINNUM_IEEE %11, %4 + %8:vgpr(s32) = G_ANYEXT %6(s16) + $vgpr0 = COPY %8(s32) + +... + +--- +name: test_min_K1max_K0Val_f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_K0Val_f16 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %2:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s16) = G_TRUNC %2(s32) + %3:sgpr(s16) = G_FCONSTANT half 0xH4000 + %9:vgpr(s16) = COPY %3(s16) + %4:vgpr(s16) = nnan G_FMAXNUM %9, %0 + %5:sgpr(s16) = G_FCONSTANT half 0xH4400 + %10:vgpr(s16) = COPY %5(s16) + %6:vgpr(s16) = nnan G_FMINNUM %10, %4 + %8:vgpr(s32) = G_ANYEXT %6(s16) + $vgpr0 = COPY %8(s32) +... + +--- +name: test_max_min_ValK1_K0_f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_min_ValK1_K0_f32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_max_min_K1Val_K0_f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_min_K1Val_K0_f32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMINNUM %7, %0 + %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMAXNUM %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_max_K0min_ValK1_f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_K0min_ValK1_f16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %2:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s16) = G_TRUNC %2(s32) + %3:sgpr(s16) = G_FCONSTANT half 0xH4400 + %9:vgpr(s16) = COPY %3(s16) + %4:vgpr(s16) = nnan G_FMINNUM_IEEE %0, %9 + %5:sgpr(s16) = G_FCONSTANT half 0xH4000 + %10:vgpr(s16) = COPY %5(s16) + %6:vgpr(s16) = nnan G_FMAXNUM_IEEE %10, %4 + %8:vgpr(s32) = G_ANYEXT %6(s16) + $vgpr0 = COPY %8(s32) +... + +--- +name: test_max_K0min_K1Val_f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_K1Val_f16 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %2:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s16) = G_TRUNC %2(s32) + %3:sgpr(s16) = G_FCONSTANT half 0xH4400 + %9:vgpr(s16) = COPY %3(s16) + %4:vgpr(s16) = nnan G_FMINNUM %9, %0 + %5:sgpr(s16) = G_FCONSTANT half 0xH4000 + %10:vgpr(s16) = COPY %5(s16) + %6:vgpr(s16) = nnan G_FMAXNUM %10, %4 + %8:vgpr(s32) = G_ANYEXT %6(s16) + $vgpr0 = COPY %8(s32) +... + +# FixMe: add tests with attributes #2 = {"no-nans-fp-math"="true"} + +--- +name: test_min_max_K0_gt_K1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_K0_gt_K1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_max_min_K0_gt_K1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_min_K0_gt_K1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_min_max_non_inline_const +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_non_inline_const + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 8.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 8.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_min_max_f64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_min_max_f64 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 4.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C1]](s64) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00 + %11:vgpr(s64) = COPY %4(s64) + %5:vgpr(s64) = nnan G_FMAXNUM_IEEE %0, %11 + %6:sgpr(s64) = G_FCONSTANT double 4.000000e+00 + %12:vgpr(s64) = COPY %6(s64) + %7:vgpr(s64) = nnan G_FMINNUM_IEEE %5, %12 + $vgpr0_vgpr1 = COPY %7(s64) +... + +--- +name: test_min_max_v2f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_v2f16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C1]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %3:sgpr(s16) = G_FCONSTANT half 0xH4000 + %9:sgpr(s32) = G_ANYEXT %3(s16) + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) + %6:sgpr(s16) = G_FCONSTANT half 0xH4400 + %10:sgpr(s32) = G_ANYEXT %6(s16) + %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32) + %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>) + %4:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE %0, %11 + %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) + %7:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE %4, %12 + $vgpr0 = COPY %7(<2 x s16>) +... + +--- +name: test_min_max_maybe_NaN_input_ieee_false +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_false + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = G_FMAXNUM %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = G_FMINNUM %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_max_min_maybe_NaN_input_ieee_false +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_false + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %7:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = G_FMINNUM %0, %7 + %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %8:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = G_FMAXNUM %3, %8 + $vgpr0 = COPY %5(s32) +... + +--- +name: test_max_min_maybe_NaN_input_ieee_true +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.1: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_true + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 + %7:vgpr(s32) = G_FCANONICALIZE %0 + %8:vgpr(s32) = COPY %2(s32) + %3:vgpr(s32) = G_FMINNUM_IEEE %7, %8 + %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00 + %9:vgpr(s32) = COPY %4(s32) + %5:vgpr(s32) = G_FMAXNUM_IEEE %3, %9 + $vgpr0 = COPY %5(s32) +...