Index: llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -24,6 +24,11 @@ return P.match(MRI, R); } +template +bool mi_match(MachineInstr* MI, Pattern &&P) { + return P.match(MI->getMF()->getRegInfo(), MI); +} + // TODO: Extend for N use. template struct OneUse_match { SubPatternT SubPat; @@ -76,6 +81,22 @@ } ///} +struct CstRegMatch { + Register &CR; + CstRegMatch(Register &C) : CR(C) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + if (auto MaybeCst = getConstantVRegValWithLookThrough( + Reg, MRI, /*LookThroughInstrs*/ true, + /*HandleFConstants*/ false)) { + CR = MaybeCst->VReg; + return true; + } + return false; + } +}; + +inline CstRegMatch m_ICst(Register &Reg) { return CstRegMatch(Reg); } + // TODO: Rework this for different kinds of MachineOperand. // Currently assumes the Src for a match is a register. // We might want to support taking in some MachineOperands and call getReg on @@ -151,6 +172,13 @@ return true; return false; } + static bool bind(const MachineRegisterInfo &MRI, MachineInstr *&MI, + MachineInstr *Inst) { + MI = const_cast(Inst); + if (MI) + return true; + return false; + } }; template <> struct bind_helper { @@ -214,6 +242,75 @@ } }; +// Helper for (commutable) binary generic MI. Doesn't check opcode. +template +struct AnyBinaryOp_match { + LHS_P L; + RHS_P R; + + AnyBinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {} + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { + MachineInstr *TmpMI; + if (mi_match(Op, MRI, m_MInstr(TmpMI))) { + if (TmpMI->getNumOperands() == 3) { + return (L.match(MRI, TmpMI->getOperand(1).getReg()) && + R.match(MRI, TmpMI->getOperand(2).getReg())) || + (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) && + L.match(MRI, TmpMI->getOperand(2).getReg()))); + } + } + return false; + } +}; + +template +inline AnyBinaryOp_match m_BinOp(const LHS &L, const RHS &R) { + return AnyBinaryOp_match(L, R); +} + +template +inline AnyBinaryOp_match m_CommutableBinOp(const LHS &L, + const RHS &R) { + return AnyBinaryOp_match(L, R); +} + +// Helper for (commutable) binary generic MI that checks Opcode. +template +struct BinaryOpWithOpcode_match { + unsigned Opcode; + LHS_P L; + RHS_P R; + + BinaryOpWithOpcode_match(unsigned Opcode, const LHS_P &LHS, const RHS_P &RHS) + : Opcode(Opcode), L(LHS), R(RHS) {} + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { + MachineInstr *TmpMI; + if (mi_match(Op, MRI, m_MInstr(TmpMI))) { + if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { + return (L.match(MRI, TmpMI->getOperand(1).getReg()) && + R.match(MRI, TmpMI->getOperand(2).getReg())) || + (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) && + L.match(MRI, TmpMI->getOperand(2).getReg()))); + } + } + return false; + } +}; + +template +inline BinaryOpWithOpcode_match +m_BinOp(unsigned Opcode, const LHS &L, const RHS &R) { + return BinaryOpWithOpcode_match(Opcode, L, R); +} + +template +inline BinaryOpWithOpcode_match +m_CommutableBinOp(unsigned Opcode, const LHS &L, const RHS &R) { + return BinaryOpWithOpcode_match(Opcode, L, R); +} + template inline BinaryOp_match m_GAdd(const LHS &L, const RHS &R) { Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -37,6 +37,7 @@ class TargetPassConfig; class TargetRegisterInfo; class TargetRegisterClass; +class ConstantInt; class ConstantFP; class APFloat; @@ -142,6 +143,8 @@ getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true, bool HandleFConstants = true); +const ConstantInt *getConstantIntVRegVal(Register VReg, + const MachineRegisterInfo &MRI); const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -343,6 +343,14 @@ return ValueAndVReg{Val.getSExtValue(), VReg}; } +const ConstantInt * +llvm::getConstantIntVRegVal(Register VReg, const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (TargetOpcode::G_CONSTANT != MI->getOpcode()) + return nullptr; + return MI->getOperand(1).getCImm(); +} + const ConstantFP * llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); Index: llvm/lib/Target/AMDGPU/AMDGPUCombine.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -37,6 +37,18 @@ [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]), (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; +def med3_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::Med3MatchInfo">; + +def int_minmax_to_med3 : GICombineRule< + (defs root:$min_or_max, med3_matchdata:$matchinfo), + (match (wip_match_opcode G_SMAX, + G_SMIN, + G_UMAX, + G_UMIN):$min_or_max, + [{ return PostLegalizerHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), + (apply [{ PostLegalizerHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>; + + // Combines which should only apply on SI/VI def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; @@ -49,7 +61,8 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPostLegalizerCombinerHelper", [all_combines, gfx6gfx7_combines, - uchar_to_float, cvt_f32_ubyteN]> { + uchar_to_float, cvt_f32_ubyteN, + int_minmax_to_med3]> { let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; let StateClass = "AMDGPUPostLegalizerCombinerHelperState"; let AdditionalArguments = []; Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -210,6 +210,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, Index: llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -66,6 +66,25 @@ bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo); + + struct MinMaxMedOpc { + unsigned Min, Max, Med; + }; + + struct Med3MatchInfo { + unsigned Opc; + Register Val0, Val1, Val2; + }; + + Optional getMinMaxPair(unsigned Opc); + + template + bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, + Register &Val, Register &K0, Register &K1); + + bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); + void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); + }; bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( @@ -245,6 +264,78 @@ MI.eraseFromParent(); } +Optional +AMDGPUPostLegalizerCombinerHelper::getMinMaxPair(unsigned Opc) { + switch (Opc) { + default: + return None; + case AMDGPU::G_SMAX: + case AMDGPU::G_SMIN: + return MinMaxMedOpc{AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; + case AMDGPU::G_UMAX: + case AMDGPU::G_UMIN: + return MinMaxMedOpc{AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; + } +} + +template +bool AMDGPUPostLegalizerCombinerHelper::matchMed(MachineInstr &MI, + MachineRegisterInfo &MRI, + MinMaxMedOpc MMMOpc, + Register &Val, Register &K0, + Register &K1) { + // 4 operand commutes of: min(max(Val, K0), K1). Find K1 from outer instr: + // min(max(...), K1) or min(K1, max(...)). Find K0 and Val from inner instr: + // max(K0, Val) or max(Val, K0). + if (MI.getOpcode() == MMMOpc.Min) + return mi_match( + &MI, + m_CommutableBinOp(m_CommutableBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)), + m_Cst(K1))); + // 4 operand commutes of: max(min(Val, K1), K0). Find K0 from outer instr: + // max(min(...), K0) or max(K0, min(...)). Find K1 and Val from inner instr: + // min(K1, Val) or min(Val, K1). + return mi_match(&MI, m_CommutableBinOp( + m_CommutableBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)), + m_Cst(K0))); +} + +bool AMDGPUPostLegalizerCombinerHelper::matchIntMinMaxToMed3( + MachineInstr &MI, Med3MatchInfo &MatchInfo) { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty != LLT::scalar(16) && Ty != LLT::scalar(32)) + return false; + + auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); + assert(OpcodeTriple && "Opcode not supported"); + assert((OpcodeTriple->Med == AMDGPU::G_AMDGPU_SMED3 || + OpcodeTriple->Med == AMDGPU::G_AMDGPU_UMED3) && + "Opcode not supported"); + + Register Val, K0, K1; + // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. + if (!matchMed(MI, MRI, *OpcodeTriple, Val, K0, K1)) + return false; + + const APInt &KO_Imm = getConstantIntVRegVal(K0, MRI)->getValue(); + const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue(); + if (OpcodeTriple->Med == AMDGPU::G_AMDGPU_SMED3 && KO_Imm.sgt(K1_Imm)) + return false; + if (OpcodeTriple->Med == AMDGPU::G_AMDGPU_UMED3 && KO_Imm.ugt(K1_Imm)) + return false; + + MatchInfo = {OpcodeTriple->Med, Val, K0, K1}; + return true; +} + +void AMDGPUPostLegalizerCombinerHelper::applyMed3(MachineInstr &MI, + Med3MatchInfo &MatchInfo) { + B.setInstrAndDebugLoc(MI); + B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, + {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags()); + MI.eraseFromParent(); +} + class AMDGPUPostLegalizerCombinerHelperState { protected: CombinerHelper &Helper; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3637,6 +3637,8 @@ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3: + case AMDGPU::G_AMDGPU_SMED3: + case AMDGPU::G_AMDGPU_UMED3: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2645,3 +2645,15 @@ let mayLoad = 1; let mayStore = 0; } + +def G_AMDGPU_SMED3 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2); + let hasSideEffects = 0; +} + +def G_AMDGPU_UMED3 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2); + let hasSideEffects = 0; +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-smed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-smed3.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_ValK0__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_K0Val__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 -12 + %3:_(s32) = G_SMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_SMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_ValK1_K0_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_K1Val_K0_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_ValK1__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_K1Val__i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -12 + ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_SMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 -12 + %5:_(s32) = G_SMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-umed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-umed3.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_min_max_ValK0_K1_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: min_max_ValK0_K1_i32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: min_max_ValK0_K1_i32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_ValK0__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %0, %2 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_min_K1max_K0Val__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 12 + %3:_(s32) = G_UMAX %2, %0 + %4:_(s32) = G_CONSTANT i32 17 + %5:_(s32) = G_UMIN %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_ValK1_K0_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_min_K1Val_K0_u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %3, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_ValK1__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %0, %2 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... + +--- +name: test_max_K0min_K1Val__u32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %2:_(s32) = G_CONSTANT i32 17 + %3:_(s32) = G_UMIN %2, %0 + %4:_(s32) = G_CONSTANT i32 12 + %5:_(s32) = G_UMAX %4, %3 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define i32 @test_min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: test_min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) + %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) + ret i32 %smed +} + +define i32 @min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) + %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17) + ret i32 %smed +} + +define i32 @test_min_K1max_ValK0__i32(i32 %a) { +; GFX10-LABEL: test_min_K1max_ValK0__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12) + %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) + ret i32 %smed +} + +define i32 @test_min_K1max_K0Val__i32(i32 %a) { +; GFX10-LABEL: test_min_K1max_K0Val__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a) + %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax) + ret i32 %smed +} + +define i32 @test_max_min_ValK1_K0_i32(i32 %a) { +; GFX10-LABEL: test_max_min_ValK1_K0_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) + %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) + ret i32 %smed +} + +define i32 @test_max_min_K1Val_K0_i32(i32 %a) { +; GFX10-LABEL: test_max_min_K1Val_K0_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) + %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12) + ret i32 %smed +} + +define i32 @test_max_K0min_ValK1__i32(i32 %a) { +; GFX10-LABEL: test_max_K0min_ValK1__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 %a, i32 17) + %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) + ret i32 %smed +} + +define i32 @test_max_K0min_K1Val__i32(i32 %a) { +; GFX10-LABEL: test_max_K0min_K1Val__i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %smin = call i32 @llvm.smin.i32(i32 17, i32 %a) + %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin) + ret i32 %smed +} + +declare i32 @llvm.smin.i32(i32, i32) +declare i32 @llvm.smax.i32(i32, i32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +define i32 @test_min_max_ValK0_K1_u32(i32 %a) { +; GFX10-LABEL: test_min_max_ValK0_K1_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) + %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) + ret i32 %umed +} + +define i32 @min_max_ValK0_K1_i32(i32 %a) { +; GFX10-LABEL: min_max_ValK0_K1_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) + %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) + ret i32 %umed +} + +define i32 @test_min_K1max_ValK0__u32(i32 %a) { +; GFX10-LABEL: test_min_K1max_ValK0__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) + %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) + ret i32 %umed +} + +define i32 @test_min_K1max_K0Val__u32(i32 %a) { +; GFX10-LABEL: test_min_K1max_K0Val__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) + %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) + ret i32 %umed +} + +define i32 @test_max_min_ValK1_K0_u32(i32 %a) { +; GFX10-LABEL: test_max_min_ValK1_K0_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) + %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) + ret i32 %umed +} + +define i32 @test_max_min_K1Val_K0_u32(i32 %a) { +; GFX10-LABEL: test_max_min_K1Val_K0_u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) + %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) + ret i32 %umed +} + +define i32 @test_max_K0min_ValK1__u32(i32 %a) { +; GFX10-LABEL: test_max_K0min_ValK1__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) + %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) + ret i32 %umed +} + +define i32 @test_max_K0min_K1Val__u32(i32 %a) { +; GFX10-LABEL: test_max_K0min_K1Val__u32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) + %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) + ret i32 %umed +} + +declare i32 @llvm.umin.i32(i32, i32) +declare i32 @llvm.umax.i32(i32, i32)