Index: llvm/lib/Target/AMDGPU/AMDGPUCombine.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -37,6 +37,16 @@ [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, MRI, *MF, ${matchinfo}); }]), (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; +def minmax_to_med3_matchdata : GIDefMatchData<"MinMaxToMed3MatchInfo">; +def minmax_to_med3 : GICombineRule< + (defs root:$min_or_max, minmax_to_med3_matchdata:$matchinfo), + (match (wip_match_opcode G_SMAX, + G_SMIN, + G_UMAX, + G_UMIN):$min_or_max, + [{ return matchMinMaxToMed3(*${min_or_max}, MRI, ${matchinfo}); }]), + (apply [{ applyMinMaxToMed3(*${min_or_max}, ${matchinfo}); }])>; + // Combines which should only apply on SI/VI def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; @@ -49,7 +59,7 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPostLegalizerCombinerHelper", [all_combines, gfx6gfx7_combines, - uchar_to_float, cvt_f32_ubyteN]> { + uchar_to_float, cvt_f32_ubyteN, minmax_to_med3]> { let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; } Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -210,6 +210,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, Index: llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -29,6 +29,146 @@ using namespace llvm; using namespace MIPatternMatch; +namespace { + +struct MinMaxMedOpc { + unsigned Min, Max, Med; +}; + +using MIPtr = const MachineInstr *; +/// Returns true and stores \p MI in \p Cst if it represents constant. +bool isConst(MIPtr MI, MachineRegisterInfo &MRI, MIPtr &Cst) { + unsigned Opc = MI->getOpcode(); + if (Opc == AMDGPU::G_CONSTANT) { + Cst = MI; + return true; + } + // TODO: Check for fp vector splat constants(consider ignoring undef in splat + // check). Used for clamp in f16 packed instructions. + return false; +} + +/// Checks \p MI's operands for constant values. When operands don't hold +/// constants returns false, otherwise stores constants in \p Cst and +/// instruction that defines other operand in \p Val and returns true. +bool matchOneOpWithCst(const MachineInstr &MI, MachineRegisterInfo &MRI, + MIPtr &Val, MIPtr &Cst) { + const MachineInstr *Op1Def = MRI.getVRegDef(MI.getOperand(1).getReg()); + const MachineInstr *Op2Def = MRI.getVRegDef(MI.getOperand(2).getReg()); + if (isConst(Op1Def, MRI, Cst)) { + Val = Op2Def; + return true; + } + if (isConst(Op2Def, MRI, Cst)) { + Val = Op1Def; + return true; + } + return false; +} + +/// Check \p MI's operands, one needs to be defined using \p Opc other needs to +/// hold constant value. On success, stores instruction with define operands in +/// \p InstWithOpc and \p Cst respectively. +bool matchOpcAndCst(const MachineInstr &MI, MachineRegisterInfo &MRI, + unsigned Opc, MIPtr &InstWithOpc, MIPtr &Cst) { + Register Op1 = MI.getOperand(1).getReg(); + Register Op2 = MI.getOperand(2).getReg(); + const MachineInstr *Op1Def = MRI.getVRegDef(Op1); + const MachineInstr *Op2Def = MRI.getVRegDef(Op2); + if (MRI.hasOneNonDBGUse(Op1) && Op1Def->getOpcode() == Opc && + isConst(Op2Def, MRI, Cst)) { + InstWithOpc = Op1Def; + return true; + } + if (MRI.hasOneNonDBGUse(Op2) && Op2Def->getOpcode() == Opc && + isConst(Op1Def, MRI, Cst)) { + InstWithOpc = Op2Def; + return true; + } + return false; +} + +MinMaxMedOpc getMinMaxPair(unsigned Opc) { + switch (Opc) { + default: + return {0, 0, 0}; + case AMDGPU::G_SMAX: + case AMDGPU::G_SMIN: + return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; + case AMDGPU::G_UMAX: + case AMDGPU::G_UMIN: + return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; + } +} + +bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, + MIPtr &Val, MIPtr &InnerInst, MIPtr &K0, MIPtr &K1) { + // 4 operand commutes of: min(max(Val, K0), K1) + if (MI.getOpcode() == MMMOpc.Min) + // Find K1 from outer instruction: min(max(...), K1) or min(K1, max(...)) + if (matchOpcAndCst(MI, MRI, MMMOpc.Max, InnerInst, K1)) + // Find K0 and Val from inner instruction: max(K0, Val) or max(Val, K0) + return matchOneOpWithCst(*InnerInst, MRI, Val, K0); + + // 4 operand commutes of: max(min(Val, K1), K0) + // Find K0 from outer instruction: max(min(...), K0) or max(K0, min(...)) + if (matchOpcAndCst(MI, MRI, MMMOpc.Min, InnerInst, K0)) + // Find K1 and Val from inner instruction: min(K1, Val) or min(Val, K1) + return matchOneOpWithCst(*InnerInst, MRI, Val, K1); + return false; +} + +} // end anonymous namespace + +struct MinMaxToMed3MatchInfo { + unsigned Opc; + Register Val0, Val1, Val2; +}; + +static bool matchMinMaxToMed3(MachineInstr &MI, MachineRegisterInfo &MRI, + MinMaxToMed3MatchInfo &MatchInfo) { + unsigned Opc = MI.getOpcode(); + MinMaxMedOpc OpcodeTriple = getMinMaxPair(Opc); + if (!OpcodeTriple.Min) + return false; + + // TODO: check for s16. Also s64 and v2s16 for clamp on floating point instrs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty != LLT::scalar(32)) + return false; + + const MachineInstr *Val, *InnerInst, *K0, *K1; + // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 < K1. + if (!matchMed(MI, MRI, OpcodeTriple, Val, InnerInst, K0, K1)) + return false; + + Register ValDef = Val->getOperand(0).getReg(); + Register K0Def = K0->getOperand(0).getReg(); + Register K1Def = K1->getOperand(0).getReg(); + + if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 || + OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3) { + const APInt &KO_Imm = K0->getOperand(1).getCImm()->getValue(); + const APInt &K1_Imm = K1->getOperand(1).getCImm()->getValue(); + if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && KO_Imm.sge(K1_Imm)) + return false; + if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && KO_Imm.uge(K1_Imm)) + return false; + MatchInfo = {OpcodeTriple.Med, ValDef, K0Def, K1Def}; + return true; + } + + return false; +} + +static void applyMinMaxToMed3(MachineInstr &MI, + MinMaxToMed3MatchInfo &MatchInfo) { + MachineIRBuilder B(MI); + B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, + {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}); + MI.eraseFromParent(); +} + struct FMinFMaxLegacyInfo { Register LHS; Register RHS; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3640,6 +3640,8 @@ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3: + case AMDGPU::G_AMDGPU_SMED3: + case AMDGPU::G_AMDGPU_UMED3: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2681,3 +2681,15 @@ let mayLoad = 1; let mayStore = 0; } + +def G_AMDGPU_SMED3 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2); + let hasSideEffects = 0; +} + +def G_AMDGPU_UMED3 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2); + let hasSideEffects = 0; +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-smed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-smed3.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_ValK0__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_K0Val__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_ValK1_K0_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_K1Val_K0_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_ValK1__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_K1Val__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-umed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-umed3.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_min_max_ValK0_K1_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_ValK0__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_K0Val__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_ValK1_K0_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_K1Val_K0_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_ValK1__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_K1Val__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define i32 @test_min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: test_min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) + %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) + ret i32 %smed +} + +define i32 @min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) + %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) + ret i32 %smed +} + +define i32 @test_min_K1max_ValK0__i32(i32 %a) { +; GFX10-LABEL: test_min_K1max_ValK0__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) + %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) + ret i32 %smed +} + +define i32 @test_min_K1max_K0Val__i32(i32 %a) { +; GFX10-LABEL: test_min_K1max_K0Val__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) + %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) + ret i32 %smed +} + +define i32 @test_max_min_ValK1_K0_i32(i32 %a) { +; GFX10-LABEL: test_max_min_ValK1_K0_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) + %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) + ret i32 %smed +} + +define i32 @test_max_min_K1Val_K0_i32(i32 %a) { +; GFX10-LABEL: test_max_min_K1Val_K0_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) + %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) + ret i32 %smed +} + +define i32 @test_max_K0min_ValK1__i32(i32 %a) { +; GFX10-LABEL: test_max_K0min_ValK1__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) + %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) + ret i32 %smed +} + +define i32 @test_max_K0min_K1Val__i32(i32 %a) { +; GFX10-LABEL: test_max_K0min_K1Val__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) + %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) + ret i32 %smed +} + +declare i32 @llvm.smin.i32(i32, i32) +declare i32 @llvm.smax.i32(i32, i32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define i32 @test_min_max_ValK0_K1_u32(i32 %a) { +; GFX10-LABEL: test_min_max_ValK0_K1_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) + %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) + ret i32 %umed +} + +define i32 @min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) + %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) + ret i32 %umed +} + +define i32 @test_min_K1max_ValK0__u32(i32 %a) { +; GFX10-LABEL: test_min_K1max_ValK0__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) + %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) + ret i32 %umed +} + +define i32 @test_min_K1max_K0Val__u32(i32 %a) { +; GFX10-LABEL: test_min_K1max_K0Val__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) + %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) + ret i32 %umed +} + +define i32 @test_max_min_ValK1_K0_u32(i32 %a) { +; GFX10-LABEL: test_max_min_ValK1_K0_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) + %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) + ret i32 %umed +} + +define i32 @test_max_min_K1Val_K0_u32(i32 %a) { +; GFX10-LABEL: test_max_min_K1Val_K0_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) + %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) + ret i32 %umed +} + +define i32 @test_max_K0min_ValK1__u32(i32 %a) { +; GFX10-LABEL: test_max_K0min_ValK1__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) + %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) + ret i32 %umed +} + +define i32 @test_max_K0min_K1Val__u32(i32 %a) { +; GFX10-LABEL: test_max_K0min_K1Val__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) + %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) + ret i32 %umed +} + +declare i32 @llvm.umin.i32(i32, i32) +declare i32 @llvm.umax.i32(i32, i32)