Index: llvm/lib/Target/AMDGPU/AMDGPUCombine.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -8,8 +8,25 @@ include "llvm/Target/GlobalISel/Combine.td" +// TODO: This really belongs after legalization after scalarization. +// TODO: GICombineRules should accept subtarget predicates + +def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">; + +def fcmp_select_to_fmin_fmax_legacy : GICombineRule< + (defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo), + (match (wip_match_opcode G_SELECT):$select, + [{ return matchFMinFMaxLegacy(*${select}, MRI, *MF, ${matchinfo}); }]), + (apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>; + + +// Combines which should only apply on SI/VI +def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; + + def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, - elide_br_by_inverting_cond]> { + elide_br_by_inverting_cond, + gfx6gfx7_combines]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; } Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -126,6 +126,8 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, Index: llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -21,12 +21,111 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" using namespace llvm; using namespace MIPatternMatch; +struct FMinFMaxLegacyInfo { + Register LHS; + Register RHS; + Register True; + Register False; + CmpInst::Predicate Pred; +}; + +static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineFunction &MF, FMinFMaxLegacyInfo &Info) { + // FIXME: Combines should have subtarget predicates, and we shouldn't need + // this here. + if (!MF.getSubtarget().hasFminFmaxLegacy()) + return false; + + // FIXME: Type predicate on pattern + if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) + return false; + + Register Cond = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(Cond) || + !mi_match(Cond, MRI, + m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) + return false; + + Info.True = MI.getOperand(2).getReg(); + Info.False = MI.getOperand(3).getReg(); + + if (!(Info.LHS == Info.True && Info.RHS == Info.False) && + !(Info.LHS == Info.False && Info.RHS == Info.True)) + return false; + + switch (Info.Pred) { + case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_ORD: + case CmpInst::FCMP_UNO: + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_TRUE: + return false; + default: + return true; + } +} + +static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, + const FMinFMaxLegacyInfo &Info) { + + auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) { + MachineIRBuilder MIB(MI); + MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); + }; + + switch (Info.Pred) { + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + if (Info.LHS == Info.True) + buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); + else + buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); + break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_OLT: { + // We need to permute the operands to get the correct NaN behavior. The + // selected operand is the second one based on the failing compare with NaN, + // so permute it based on the compare type the hardware uses. + if (Info.LHS == Info.True) + buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); + else + buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); + break; + } + case CmpInst::FCMP_UGE: + case CmpInst::FCMP_UGT: { + if (Info.LHS == Info.True) + buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); + else + buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); + break; + } + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_OGE: { + if (Info.LHS == Info.True) + buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); + else + buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); + break; + } + default: + llvm_unreachable("predicate should not have matched"); + } + + MI.eraseFromParent(); +} + + #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AMDGPUGenGICombiner.inc" #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2791,6 +2791,8 @@ case AMDGPU::G_FCANONICALIZE: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_AMDGPU_FFBH_U32: + case AMDGPU::G_AMDGPU_FMIN_LEGACY: + case AMDGPU::G_AMDGPU_FMAX_LEGACY: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2146,6 +2146,18 @@ let hasSideEffects = 0; } +def G_AMDGPU_FMIN_LEGACY : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1); + let hasSideEffects = 0; +} + +def G_AMDGPU_FMAX_LEGACY : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1); + let hasSideEffects = 0; +} + // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector // operand Expects a MachineMemOperand in addition to explicit // operands. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll @@ -0,0 +1,257 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s + +define float @v_test_fmax_legacy_ogt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ogt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ogt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ogt float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_oge_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_oge_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_oge_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp oge float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_uge_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_uge_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_ugt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ugt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ugt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ugt float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_ole_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ole_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ole_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_le_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ole float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmax_legacy_olt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_olt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_olt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp olt float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmax_legacy_ule_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ule_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ule_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmax_legacy_ult_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ult_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ult_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nge_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ult float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmax_legacy_oge_f32_fneg_lhs(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_lhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e64 v0, -v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_oge_f32_fneg_lhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX8-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %a.neg = fneg float %a + %cmp = fcmp oge float %a.neg, %b + %val = select i1 %cmp, float %a.neg, float %b + ret float %val +} + +define float @v_test_fmax_legacy_oge_f32_fneg_rhs(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_max_legacy_f32_e64 v0, v0, -v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_oge_f32_fneg_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX8-NEXT: v_cmp_ge_f32_e64 vcc, v0, -v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %b.neg = fneg float %b + %cmp = fcmp oge float %a, %b.neg + %val = select i1 %cmp, float %a, float %b.neg + ret float %val +} + +define float @v_test_fcmp_select_ord(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_ord: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_ord: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ord float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) { +; GFX6-LABEL: v_test_fmax_legacy_ule_f32_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: s_mov_b32 m0, -1 +; GFX6-NEXT: ds_write_b32 v0, v1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ule_f32_multi_use: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_write_b32 v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ogt float %a, %b + %val0 = select i1 %cmp, float %a, float %b + %val1 = zext i1 %cmp to i32 + store i32 %val1, i32 addrspace(3)* undef + ret float %val0 +} + +define double @v_test_fmax_legacy_ult_f64(double %a, double %b) { +; GFX6-LABEL: v_test_fmax_legacy_ult_f64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmax_legacy_ult_f64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ult double %a, %b + %val = select i1 %cmp, double %b, double %a + ret double %val +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll @@ -0,0 +1,386 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s + +; TODO: Merge with DAG test + +define float @v_test_fmin_legacy_ole_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ole_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ole_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_le_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ole float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_olt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_olt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_olt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp olt float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ule_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ule_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ult_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ult_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ult_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nge_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ult float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ogt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ogt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ogt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ogt float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmin_legacy_oge_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_oge_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_oge_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp oge float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmin_legacy_uge_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_uge_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_uge_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmin_legacy_ugt_f32(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ugt_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ugt_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ugt float %a, %b + %val = select i1 %cmp, float %b, float %a + ret float %val +} + +define float @v_test_fmin_legacy_ole_f32_fneg_lhs(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_lhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e64 v0, -v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ole_f32_fneg_lhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX8-NEXT: v_cmp_le_f32_e64 vcc, -v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %a.neg = fneg float %a + %cmp = fcmp ole float %a.neg, %b + %val = select i1 %cmp, float %a.neg, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ole_f32_fneg_rhs(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_min_legacy_f32_e64 v0, v0, -v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ole_f32_fneg_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 +; GFX8-NEXT: v_cmp_le_f32_e64 vcc, v0, -v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %b.neg = fneg float %b + %cmp = fcmp ole float %a, %b.neg + %val = select i1 %cmp, float %a, float %b.neg + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) { +; GFX6-LABEL: v_test_fmin_legacy_ule_f32_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: s_mov_b32 m0, -1 +; GFX6-NEXT: ds_write_b32 v0, v1 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ule_f32_multi_use: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_write_b32 v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val0 = select i1 %cmp, float %a, float %b + %val1 = zext i1 %cmp to i32 + store i32 %val1, i32 addrspace(3)* undef + ret float %val0 +} + +define double @v_test_fmin_legacy_ole_f64(double %a, double %b) { +; GFX6-LABEL: v_test_fmin_legacy_ole_f64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fmin_legacy_ole_f64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ole double %a, %b + %val = select i1 %cmp, double %a, double %b + ret double %val +} + +define float @v_test_fcmp_select_oeq(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_oeq: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_oeq: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp oeq float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_one(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_one: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_lg_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_one: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_lg_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp one float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_ord(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_ord: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_ord: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ord float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_uno(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_uno: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_uno: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uno float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_ueq(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_ueq: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_nlg_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_ueq: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_nlg_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ueq float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_une(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_une: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_une: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_neq_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp une float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_true(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_true: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_true: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp true float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fcmp_select_false(float %a, float %b) { +; GFX6-LABEL: v_test_fcmp_select_false: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_test_fcmp_select_false: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp false float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +}