Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -170,8 +170,6 @@
   case AMDGPUISD::CLAMP:
   case AMDGPUISD::COS_HW:
   case AMDGPUISD::SIN_HW:
-  case AMDGPUISD::FMIN3:
-  case AMDGPUISD::FMAX3:
   case AMDGPUISD::FMED3:
   case AMDGPUISD::FMAD_FTZ:
   case AMDGPUISD::RCP:
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -395,12 +395,6 @@
   FMAX_LEGACY,
   FMIN_LEGACY,
 
-  FMAX3,
-  SMAX3,
-  UMAX3,
-  FMIN3,
-  SMIN3,
-  UMIN3,
   FMED3,
   SMED3,
   UMED3,
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4413,12 +4413,6 @@
   NODE_NAME_CASE(SIN_HW)
   NODE_NAME_CASE(FMAX_LEGACY)
   NODE_NAME_CASE(FMIN_LEGACY)
-  NODE_NAME_CASE(FMAX3)
-  NODE_NAME_CASE(SMAX3)
-  NODE_NAME_CASE(UMAX3)
-  NODE_NAME_CASE(FMIN3)
-  NODE_NAME_CASE(SMIN3)
-  NODE_NAME_CASE(UMIN3)
   NODE_NAME_CASE(FMED3)
   NODE_NAME_CASE(SMED3)
   NODE_NAME_CASE(UMED3)
@@ -4802,8 +4796,6 @@
            DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
   }
   case AMDGPUISD::FMED3:
-  case AMDGPUISD::FMIN3:
-  case AMDGPUISD::FMAX3:
   case AMDGPUISD::FMAD_FTZ: {
     if (SNaN)
       return true;
Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -150,38 +150,6 @@
   []
 >;
 
-// FIXME: TableGen doesn't like commutative instructions with more
-// than 2 operands.
-// out = max(a, b, c) a, b and c are floats
-def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = max(a, b, c) a, b, and c are signed ints
-def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = max(a, b, c) a, b and c are unsigned ints
-def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b, c) a, b and c are floats
-def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b, c) a, b and c are signed ints
-def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
-  [/*SDNPCommutative, SDNPAssociative*/]
->;
-
 // out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
 def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
 
Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -353,10 +353,6 @@
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasMin3Max3_16() const {
-    return getGeneration() >= AMDGPUSubtarget::GFX9;
-  }
-
   bool hasFmaMixInsts() const {
     return HasFmaMixInsts;
   }
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9728,9 +9728,7 @@
   case ISD::FMINNUM_IEEE:
   case ISD::FMAXNUM_IEEE:
   case AMDGPUISD::CLAMP:
-  case AMDGPUISD::FMED3:
-  case AMDGPUISD::FMAX3:
-  case AMDGPUISD::FMIN3: {
+  case AMDGPUISD::FMED3: {
     // FIXME: Shouldn't treat the generic operations different based these.
     // However, we aren't really required to flush the result from
     // minnum/maxnum..
@@ -9980,27 +9978,6 @@
   return isCanonicalized(DAG, N0) ? N0 : SDValue();
 }
 
-static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
-  switch (Opc) {
-  case ISD::FMAXNUM:
-  case ISD::FMAXNUM_IEEE:
-    return AMDGPUISD::FMAX3;
-  case ISD::SMAX:
-    return AMDGPUISD::SMAX3;
-  case ISD::UMAX:
-    return AMDGPUISD::UMAX3;
-  case ISD::FMINNUM:
-  case ISD::FMINNUM_IEEE:
-    return AMDGPUISD::FMIN3;
-  case ISD::SMIN:
-    return AMDGPUISD::SMIN3;
-  case ISD::UMIN:
-    return AMDGPUISD::UMIN3;
-  default:
-    llvm_unreachable("Not a min/max opcode");
-  }
-}
-
 SDValue SITargetLowering::performIntMed3ImmCombine(
   SelectionDAG &DAG, const SDLoc &SL,
   SDValue Op0, SDValue Op1, bool Signed) const {
@@ -10120,36 +10097,6 @@
   // Only do this if the inner op has one use since this will just increases
   // register pressure for no benefit.
 
-  if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
-      !VT.isVector() &&
-      (VT == MVT::i32 || VT == MVT::f32 ||
-       ((VT == MVT::f16 || VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
-    // max(max(a, b), c) -> max3(a, b, c)
-    // min(min(a, b), c) -> min3(a, b, c)
-    if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
-      SDLoc DL(N);
-      return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
-                         DL,
-                         N->getValueType(0),
-                         Op0.getOperand(0),
-                         Op0.getOperand(1),
-                         Op1);
-    }
-
-    // Try commuted.
-    // max(a, max(b, c)) -> max3(a, b, c)
-    // min(a, min(b, c)) -> min3(a, b, c)
-    if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
-      SDLoc DL(N);
-      return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
-                         DL,
-                         N->getValueType(0),
-                         Op0,
-                         Op1.getOperand(0),
-                         Op1.getOperand(1));
-    }
-  }
-
   // min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
   if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {
     if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, true))
Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -347,15 +347,15 @@
 // XXX - No FPException seems suspect but manual doesn't say it does
 let mayRaiseFPException = 0 in {
   let isCommutable = 1 in {
-    defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
-    defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
-    defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
-    defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
+    defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+    defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+    defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+    defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
     defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
     defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
   } // End isCommutable = 1
-  defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
-  defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
+  defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+  defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
   defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
 } // End mayRaiseFPException = 0
 
@@ -609,6 +609,12 @@
   }];
 }
 
+class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
+  // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
+  (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
+  (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
+>;
+
 let SubtargetPredicate = isGFX9Plus in {
 let isCommutable = 1, isReMaterializable = 1 in {
   defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -624,13 +630,13 @@
 defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
 defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
 
-defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
-defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
-defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
+defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
+defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 
-defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
-defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
-defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
+defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
+defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 
 defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
 defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
@@ -649,13 +655,6 @@
 defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 } // End isReMaterializable = 1
 
-
-class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
-  // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
-  (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
-  (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
->;
-
 def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
 def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
 def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
@@ -688,6 +687,39 @@
 def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
 } // End SubtargetPredicate = isGFX9Plus
 
+class ThreeOp_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
+                        Instruction inst> : GCNPat <
+  (ThreeOpFrag<op1, op2> i16:$src0, i16:$src1, i16:$src2),
+  (inst SRCMODS.NONE, i16:$src0, SRCMODS.NONE, i16:$src1,
+        SRCMODS.NONE, i16:$src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+class ThreeOpFP_Pats<ValueType vt, SDPatternOperator op1, SDPatternOperator op2,
+                     Instruction inst> : GCNPat <
+  (DivergentBinFrag<op1> (HasOneUseBinOp<op2>
+                          (VOP3Mods vt:$src0, i32:$src0_mods),
+                          (VOP3Mods vt:$src1, i32:$src1_mods)),
+                         (vt (VOP3Mods vt:$src2, i32:$src2_mods))),
+  (inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+        DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : ThreeOp_i32_Pats<smin, smin, V_MIN3_I32_e64>;
+def : ThreeOp_i32_Pats<umin, umin, V_MIN3_U32_e64>;
+def : ThreeOp_i32_Pats<smax, smax, V_MAX3_I32_e64>;
+def : ThreeOp_i32_Pats<umax, umax, V_MAX3_U32_e64>;
+def : ThreeOpFP_Pats<f32, fminnum_like, fminnum_like, V_MIN3_F32_e64>;
+def : ThreeOpFP_Pats<f32, fmaxnum_like, fmaxnum_like, V_MAX3_F32_e64>;
+
+let SubtargetPredicate = isGFX9Plus in {
+def : ThreeOp_i16_Pats<smin, smin, V_MIN3_I16_e64>;
+def : ThreeOp_i16_Pats<umin, umin, V_MIN3_U16_e64>;
+def : ThreeOp_i16_Pats<smax, smax, V_MAX3_I16_e64>;
+def : ThreeOp_i16_Pats<umax, umax, V_MAX3_U16_e64>;
+def : ThreeOpFP_Pats<f16, fminnum_like, fminnum_like, V_MIN3_F16_e64>;
+def : ThreeOpFP_Pats<f16, fmaxnum_like, fmaxnum_like, V_MAX3_F16_e64>;
+}
+
 def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
   let Src0RC64 = VRegSrc_32;
   let Src1RC64 = SCSrc_b32;
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmax3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/fmax3.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmax3.ll
@@ -10,9 +10,8 @@
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI_VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; SI_VI-NEXT:    v_max_f32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; SI_VI-NEXT:    v_max_f32_e32 v0, v0, v1
+; SI_VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI_VI-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_f32_ieee_true:
@@ -20,9 +19,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_f32_ieee_true:
@@ -31,9 +29,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %max3 = call float @llvm.maxnum.f32(float %max, float %c)
@@ -46,9 +43,8 @@
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI_VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; SI_VI-NEXT:    v_max_f32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; SI_VI-NEXT:    v_max_f32_e32 v0, v1, v0
+; SI_VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI_VI-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_f32_commute_ieee_true:
@@ -56,9 +52,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_f32_commute_ieee_true:
@@ -67,9 +62,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %max3 = call float @llvm.maxnum.f32(float %c, float %max)
@@ -105,9 +99,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX9-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_f16_ieee_true:
@@ -116,9 +109,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX10-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX10-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %max = call half @llvm.maxnum.f16(half %a, half %b)
   %max3 = call half @llvm.maxnum.f16(half %max, half %c)
@@ -154,9 +146,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f16_e32 v0, v1, v0
+; GFX9-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX9-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_f16_commute_ieee_true:
@@ -165,9 +156,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f16_e32 v0, v1, v0
+; GFX10-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX10-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %max = call half @llvm.maxnum.f16(half %a, half %b)
   %max3 = call half @llvm.maxnum.f16(half %c, half %max)
@@ -177,8 +167,7 @@
 define amdgpu_ps float @test_max3_f32_ieee_false(float %a, float %b, float %c) {
 ; GCN-LABEL: test_max3_f32_ieee_false:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v2
+; GCN-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %max3 = call float @llvm.maxnum.f32(float %max, float %c)
@@ -188,8 +177,7 @@
 define amdgpu_ps float @test_max3_f32_commute_ieee_false(float %a, float %b, float %c) {
 ; GCN-LABEL: test_max3_f32_commute_ieee_false:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
-; GCN-NEXT:    v_max_f32_e32 v0, v2, v0
+; GCN-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %max3 = call float @llvm.maxnum.f32(float %c, float %max)
@@ -217,8 +205,7 @@
 ;
 ; GFX9_10-LABEL: test_max3_f16_ieee_false:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    ; return to shader part epilog
   %max = call half @llvm.maxnum.f16(half %a, half %b)
   %max3 = call half @llvm.maxnum.f16(half %max, half %c)
@@ -246,8 +233,7 @@
 ;
 ; GFX9_10-LABEL: test_max3_f16_commute_ieee_false:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v2, v0
+; GFX9_10-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    ; return to shader part epilog
   %max = call half @llvm.maxnum.f16(half %a, half %b)
   %max3 = call half @llvm.maxnum.f16(half %c, half %max)
@@ -298,8 +284,7 @@
 ; SI-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_f32_e64 v0, |v0|, v1
-; SI-NEXT:    v_max_f32_e32 v0, v0, v2
+; SI-NEXT:    v_max3_f32 v0, |v0|, v1, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -307,16 +292,14 @@
 ; VI-LABEL: test_max3_f32_fabs_fneg:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
-; VI-NEXT:    v_max_f32_e64 v0, |v0|, v1
-; VI-NEXT:    v_max_f32_e32 v0, v0, v2
+; VI-NEXT:    v_max3_f32 v0, |v0|, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_max3_f32_fabs_fneg:
 ; GFX9_10:       ; %bb.0:
 ; GFX9_10-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
-; GFX9_10-NEXT:    v_max_f32_e64 v0, |v0|, v1
-; GFX9_10-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_max3_f32 v0, |v0|, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fabs = call float @llvm.fabs.f32(float %a)
@@ -332,25 +315,22 @@
 define amdgpu_ps void @test_fmax3_f32_vvv(float %a, float %b, float %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmax3_f32_vvv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_f32_e32 v0, v0, v1
+; SI-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_f32_e32 v0, v0, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmax3_f32_vvv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_f32_e32 v0, v0, v1
-; VI-NEXT:    v_max_f32_e32 v0, v0, v2
+; VI-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmax3_f32_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
@@ -362,25 +342,22 @@
 define amdgpu_ps void @test_fmax3_f32_svv(float inreg %a, float %b, float %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmax3_f32_svv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_f32_e32 v0, s2, v0
+; SI-NEXT:    v_max3_f32 v0, s2, v0, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmax3_f32_svv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_f32_e32 v0, s2, v0
-; VI-NEXT:    v_max_f32_e32 v0, v0, v1
+; VI-NEXT:    v_max3_f32 v0, s2, v0, v1
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmax3_f32_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_max3_f32 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
@@ -392,25 +369,22 @@
 define amdgpu_ps void @test_fmax3_f32_vvs(float %a, float %b, float inreg %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmax3_f32_vvs:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_f32_e32 v0, v0, v1
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_max_f32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; SI-NEXT:    v_max3_f32 v0, v0, v1, s2
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmax3_f32_vvs:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_f32_e32 v0, v0, v1
-; VI-NEXT:    v_max_f32_e32 v0, s2, v0
+; VI-NEXT:    v_max3_f32 v0, v0, v1, s2
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmax3_f32_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f32_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_max3_f32 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
@@ -423,10 +397,9 @@
 ; SI-LABEL: test_fmax3_f32_ssv:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_mov_b32_e32 v3, s3
-; SI-NEXT:    v_max_f32_e32 v3, s2, v3
+; SI-NEXT:    v_max3_f32 v0, s2, v3, v0
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_f32_e32 v0, v3, v0
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -434,23 +407,20 @@
 ; VI-LABEL: test_fmax3_f32_ssv:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_max_f32_e32 v3, s2, v3
-; VI-NEXT:    v_max_f32_e32 v0, v3, v0
+; VI-NEXT:    v_max3_f32 v0, s2, v3, v0
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: test_fmax3_f32_ssv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_max_f32_e32 v3, s2, v3
-; GFX9-NEXT:    v_max_f32_e32 v0, v3, v0
+; GFX9-NEXT:    v_max3_f32 v0, s2, v3, v0
 ; GFX9-NEXT:    global_store_dword v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_fmax3_f32_ssv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_f32_e64 v3, s2, s3
-; GFX10-NEXT:    v_max_f32_e32 v0, v3, v0
+; GFX10-NEXT:    v_max3_f32 v0, s2, s3, v0
 ; GFX10-NEXT:    global_store_dword v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
@@ -462,27 +432,33 @@
 define amdgpu_ps void @test_fmax3_f32_vss(float %a, float inreg %b, float inreg %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmax3_f32_vss:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_f32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_max_f32_e32 v0, s3, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; SI-NEXT:    v_mov_b32_e32 v3, s3
+; SI-NEXT:    v_max3_f32 v0, v0, s2, v3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmax3_f32_vss:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_f32_e32 v0, s2, v0
-; VI-NEXT:    v_max_f32_e32 v0, s3, v0
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_max3_f32 v0, v0, s2, v3
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_fmax3_f32_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_f32_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_fmax3_f32_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_max3_f32 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_dword v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_fmax3_f32_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_max3_f32 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
   %fmax3 = call float @llvm.maxnum.f32(float %fmax, float %c)
   store float %fmax3, float addrspace(1)* %out, align 4
@@ -493,10 +469,10 @@
 ; SI-LABEL: test_fmax3_f32_sss:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_mov_b32_e32 v2, s3
-; SI-NEXT:    v_max_f32_e32 v2, s2, v2
+; SI-NEXT:    v_mov_b32_e32 v3, s4
+; SI-NEXT:    v_max3_f32 v2, s2, v2, v3
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_f32_e32 v2, s4, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -504,23 +480,23 @@
 ; VI-LABEL: test_fmax3_f32_sss:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_max_f32_e32 v2, s2, v2
-; VI-NEXT:    v_max_f32_e32 v2, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s4
+; VI-NEXT:    v_max3_f32 v2, s2, v2, v3
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: test_fmax3_f32_sss:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
-; GFX9-NEXT:    v_max_f32_e32 v2, s2, v2
-; GFX9-NEXT:    v_max_f32_e32 v2, s4, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_max3_f32 v2, s2, v2, v3
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_fmax3_f32_sss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_f32_e64 v2, s2, s3
-; GFX10-NEXT:    v_max_f32_e32 v2, s4, v2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-NEXT:    v_max3_f32 v2, s2, s3, v2
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
@@ -555,8 +531,7 @@
 ;
 ; GFX9_10-LABEL: test_max3_f16_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_max3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_short v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
@@ -591,8 +566,7 @@
 ;
 ; GFX9_10-LABEL: test_max3_f16_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_max3_f16 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
@@ -627,8 +601,7 @@
 ;
 ; GFX9_10-LABEL: test_max3_f16_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_f16_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_max3_f16 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
@@ -665,15 +638,13 @@
 ; GFX9-LABEL: test_max3_f16_ssv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_max_f16_e32 v3, s2, v3
-; GFX9-NEXT:    v_max_f16_e32 v0, v3, v0
+; GFX9-NEXT:    v_max3_f16 v0, s2, v3, v0
 ; GFX9-NEXT:    global_store_short v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_max3_f16_ssv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_f16_e64 v3, s2, s3
-; GFX10-NEXT:    v_max_f16_e32 v0, v3, v0
+; GFX10-NEXT:    v_max3_f16 v0, s2, s3, v0
 ; GFX10-NEXT:    global_store_short v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
@@ -706,12 +677,18 @@
 ; VI-NEXT:    flat_store_short v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_max3_f16_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_f16_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_f16_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_short v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_max3_f16_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_max3_f16 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_short v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_max3_f16_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_max3_f16 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
   %fmax3 = call half @llvm.maxnum.f16(half %fmax, half %c)
   store half %fmax3, half addrspace(1)* %out, align 4
@@ -746,15 +723,15 @@
 ; GFX9-LABEL: test_max3_f16_sss:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
-; GFX9-NEXT:    v_max_f16_e32 v2, s2, v2
-; GFX9-NEXT:    v_max_f16_e32 v2, s4, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_max3_f16 v2, s2, v2, v3
 ; GFX9-NEXT:    global_store_short v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_max3_f16_sss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_f16_e64 v2, s2, s3
-; GFX10-NEXT:    v_max_f16_e32 v2, s4, v2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-NEXT:    v_max3_f16 v2, s2, s3, v2
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3.ll
@@ -10,9 +10,8 @@
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI_VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; SI_VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; SI_VI-NEXT:    v_min_f32_e32 v0, v0, v1
+; SI_VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI_VI-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_f32_ieee_true:
@@ -20,9 +19,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_f32_ieee_true:
@@ -31,9 +29,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %min3 = call float @llvm.minnum.f32(float %min, float %c)
@@ -46,9 +43,8 @@
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI_VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; SI_VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; SI_VI-NEXT:    v_min_f32_e32 v0, v1, v0
+; SI_VI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI_VI-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_f32_commute_ieee_true:
@@ -56,9 +52,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_min_f32_e32 v0, v1, v0
+; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_f32_commute_ieee_true:
@@ -67,9 +62,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_min_f32_e32 v0, v1, v0
+; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %min3 = call float @llvm.minnum.f32(float %c, float %min)
@@ -105,9 +99,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX9-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_f16_ieee_true:
@@ -116,9 +109,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX10-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX10-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %min = call half @llvm.minnum.f16(half %a, half %b)
   %min3 = call half @llvm.minnum.f16(half %min, half %c)
@@ -154,9 +146,8 @@
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX9-NEXT:    v_min_f16_e32 v0, v1, v0
+; GFX9-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX9-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_f16_commute_ieee_true:
@@ -165,9 +156,8 @@
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX10-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f16_e32 v1, v2, v2
-; GFX10-NEXT:    v_min_f16_e32 v0, v1, v0
+; GFX10-NEXT:    v_max_f16_e32 v2, v2, v2
+; GFX10-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %min = call half @llvm.minnum.f16(half %a, half %b)
   %min3 = call half @llvm.minnum.f16(half %c, half %min)
@@ -177,8 +167,7 @@
 define amdgpu_ps float @test_min3_f32_ieee_false(float %a, float %b, float %c) {
 ; GCN-LABEL: test_min3_f32_ieee_false:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v2
+; GCN-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %min3 = call float @llvm.minnum.f32(float %min, float %c)
@@ -188,8 +177,7 @@
 define amdgpu_ps float @test_min3_f32_commute_ieee_false(float %a, float %b, float %c) {
 ; GCN-LABEL: test_min3_f32_commute_ieee_false:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
-; GCN-NEXT:    v_min_f32_e32 v0, v2, v0
+; GCN-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %min3 = call float @llvm.minnum.f32(float %c, float %min)
@@ -217,8 +205,7 @@
 ;
 ; GFX9_10-LABEL: test_min3_f16_ieee_false:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    ; return to shader part epilog
   %min = call half @llvm.minnum.f16(half %a, half %b)
   %min3 = call half @llvm.minnum.f16(half %min, half %c)
@@ -246,8 +233,7 @@
 ;
 ; GFX9_10-LABEL: test_min3_f16_commute_ieee_false:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v2, v0
+; GFX9_10-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    ; return to shader part epilog
   %min = call half @llvm.minnum.f16(half %a, half %b)
   %min3 = call half @llvm.minnum.f16(half %c, half %min)
@@ -298,8 +284,7 @@
 ; SI-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_f32_e64 v0, |v0|, v1
-; SI-NEXT:    v_min_f32_e32 v0, v0, v2
+; SI-NEXT:    v_min3_f32 v0, |v0|, v1, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -307,16 +292,14 @@
 ; VI-LABEL: test_min3_f32_fabs_fneg:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
-; VI-NEXT:    v_min_f32_e64 v0, |v0|, v1
-; VI-NEXT:    v_min_f32_e32 v0, v0, v2
+; VI-NEXT:    v_min3_f32 v0, |v0|, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_min3_f32_fabs_fneg:
 ; GFX9_10:       ; %bb.0:
 ; GFX9_10-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
-; GFX9_10-NEXT:    v_min_f32_e64 v0, |v0|, v1
-; GFX9_10-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_min3_f32 v0, |v0|, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fabs = call float @llvm.fabs.f32(float %a)
@@ -332,25 +315,22 @@
 define amdgpu_ps void @test_fmin3_f32_vvv(float %a, float %b, float %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmin3_f32_vvv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_f32_e32 v0, v0, v1
+; SI-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_f32_e32 v0, v0, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmin3_f32_vvv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-NEXT:    v_min_f32_e32 v0, v0, v2
+; VI-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmin3_f32_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
@@ -362,25 +342,22 @@
 define amdgpu_ps void @test_fmin3_f32_svv(float inreg %a, float %b, float %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmin3_f32_svv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_f32_e32 v0, s2, v0
+; SI-NEXT:    v_min3_f32 v0, s2, v0, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_f32_e32 v0, v0, v1
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmin3_f32_svv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_f32_e32 v0, s2, v0
-; VI-NEXT:    v_min_f32_e32 v0, v0, v1
+; VI-NEXT:    v_min3_f32 v0, s2, v0, v1
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmin3_f32_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_min3_f32 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
@@ -392,25 +369,22 @@
 define amdgpu_ps void @test_fmin3_f32_vvs(float %a, float %b, float inreg %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmin3_f32_vvs:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_f32_e32 v0, v0, v1
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_min_f32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; SI-NEXT:    v_min3_f32 v0, v0, v1, s2
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmin3_f32_vvs:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_f32_e32 v0, v0, v1
-; VI-NEXT:    v_min_f32_e32 v0, s2, v0
+; VI-NEXT:    v_min3_f32 v0, v0, v1, s2
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_fmin3_f32_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f32_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_min3_f32 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
@@ -423,10 +397,9 @@
 ; SI-LABEL: test_fmin3_f32_ssv:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_mov_b32_e32 v3, s3
-; SI-NEXT:    v_min_f32_e32 v3, s2, v3
+; SI-NEXT:    v_min3_f32 v0, s2, v3, v0
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_f32_e32 v0, v3, v0
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -434,23 +407,20 @@
 ; VI-LABEL: test_fmin3_f32_ssv:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    v_min_f32_e32 v3, s2, v3
-; VI-NEXT:    v_min_f32_e32 v0, v3, v0
+; VI-NEXT:    v_min3_f32 v0, s2, v3, v0
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: test_fmin3_f32_ssv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_min_f32_e32 v3, s2, v3
-; GFX9-NEXT:    v_min_f32_e32 v0, v3, v0
+; GFX9-NEXT:    v_min3_f32 v0, s2, v3, v0
 ; GFX9-NEXT:    global_store_dword v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_fmin3_f32_ssv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_f32_e64 v3, s2, s3
-; GFX10-NEXT:    v_min_f32_e32 v0, v3, v0
+; GFX10-NEXT:    v_min3_f32 v0, s2, s3, v0
 ; GFX10-NEXT:    global_store_dword v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
@@ -462,27 +432,33 @@
 define amdgpu_ps void @test_fmin3_f32_vss(float %a, float inreg %b, float inreg %c, float addrspace(1)* %out) {
 ; SI-LABEL: test_fmin3_f32_vss:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_f32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_min_f32_e32 v0, s3, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; SI-NEXT:    v_mov_b32_e32 v3, s3
+; SI-NEXT:    v_min3_f32 v0, v0, s2, v3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_fmin3_f32_vss:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_f32_e32 v0, s2, v0
-; VI-NEXT:    v_min_f32_e32 v0, s3, v0
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_min3_f32 v0, v0, s2, v3
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_fmin3_f32_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_f32_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_fmin3_f32_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_min3_f32 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_dword v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_fmin3_f32_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_min3_f32 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
   %fmin3 = call float @llvm.minnum.f32(float %fmin, float %c)
   store float %fmin3, float addrspace(1)* %out, align 4
@@ -493,10 +469,10 @@
 ; SI-LABEL: test_fmin3_f32_sss:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_mov_b32_e32 v2, s3
-; SI-NEXT:    v_min_f32_e32 v2, s2, v2
+; SI-NEXT:    v_mov_b32_e32 v3, s4
+; SI-NEXT:    v_min3_f32 v2, s2, v2, v3
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_f32_e32 v2, s4, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -504,23 +480,23 @@
 ; VI-LABEL: test_fmin3_f32_sss:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_min_f32_e32 v2, s2, v2
-; VI-NEXT:    v_min_f32_e32 v2, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s4
+; VI-NEXT:    v_min3_f32 v2, s2, v2, v3
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: test_fmin3_f32_sss:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
-; GFX9-NEXT:    v_min_f32_e32 v2, s2, v2
-; GFX9-NEXT:    v_min_f32_e32 v2, s4, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_min3_f32 v2, s2, v2, v3
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_fmin3_f32_sss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_f32_e64 v2, s2, s3
-; GFX10-NEXT:    v_min_f32_e32 v2, s4, v2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-NEXT:    v_min3_f32 v2, s2, s3, v2
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
@@ -555,8 +531,7 @@
 ;
 ; GFX9_10-LABEL: test_min3_f16_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_min3_f16 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_short v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
@@ -591,8 +566,7 @@
 ;
 ; GFX9_10-LABEL: test_min3_f16_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_min3_f16 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
@@ -627,8 +601,7 @@
 ;
 ; GFX9_10-LABEL: test_min3_f16_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_f16_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_min3_f16 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
@@ -665,15 +638,13 @@
 ; GFX9-LABEL: test_min3_f16_ssv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_min_f16_e32 v3, s2, v3
-; GFX9-NEXT:    v_min_f16_e32 v0, v3, v0
+; GFX9-NEXT:    v_min3_f16 v0, s2, v3, v0
 ; GFX9-NEXT:    global_store_short v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_min3_f16_ssv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_f16_e64 v3, s2, s3
-; GFX10-NEXT:    v_min_f16_e32 v0, v3, v0
+; GFX10-NEXT:    v_min3_f16 v0, s2, s3, v0
 ; GFX10-NEXT:    global_store_short v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
@@ -706,12 +677,18 @@
 ; VI-NEXT:    flat_store_short v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_min3_f16_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_f16_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_f16_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_short v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_min3_f16_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_min3_f16 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_short v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_min3_f16_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_min3_f16 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
   %fmin3 = call half @llvm.minnum.f16(half %fmin, half %c)
   store half %fmin3, half addrspace(1)* %out, align 4
@@ -746,15 +723,15 @@
 ; GFX9-LABEL: test_min3_f16_sss:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
-; GFX9-NEXT:    v_min_f16_e32 v2, s2, v2
-; GFX9-NEXT:    v_min_f16_e32 v2, s4, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_min3_f16 v2, s2, v2, v3
 ; GFX9-NEXT:    global_store_short v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_min3_f16_sss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_f16_e64 v2, s2, s3
-; GFX10-NEXT:    v_min_f16_e32 v2, s4, v2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-NEXT:    v_min3_f16 v2, s2, s3, v2
 ; GFX10-NEXT:    global_store_short v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/max3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/max3.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/max3.ll
@@ -8,23 +8,20 @@
 ; SI_VI-LABEL: test_max3_u32:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_max_u32_e32 v0, v0, v2
+; SI_VI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_u32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u32_e32 v0, v0, v2
+; GFX9-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_u32_e32 v0, v0, v2
+; GFX10-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   %umax3 = call i32 @llvm.umax.i32(i32 %umax, i32 %c)
@@ -46,23 +43,20 @@
 ; SI_VI-LABEL: test_max3_u32_commute:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_max_u32_e32 v0, v2, v0
+; SI_VI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_u32_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u32_e32 v0, v2, v0
+; GFX9-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_u32_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_u32_e32 v0, v2, v0
+; GFX10-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   %umax3 = call i32 @llvm.umax.i32(i32 %c, i32 %umax)
@@ -76,9 +70,8 @@
 ; SI-NEXT:    s_mov_b32 s4, 0xffff
 ; SI-NEXT:    v_and_b32_e32 v0, s4, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s4, v1
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI-NEXT:    v_and_b32_e32 v1, s4, v2
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: test_max3_u16:
@@ -91,16 +84,14 @@
 ; GFX9-LABEL: test_max3_u16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v2
+; GFX9-NEXT:    v_max3_u16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_u16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_u16 v0, v0, v1
-; GFX10-NEXT:    v_max_u16 v0, v0, v2
+; GFX10-NEXT:    v_max3_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umax = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   %umax3 = call i16 @llvm.umax.i16(i16 %umax, i16 %c)
@@ -114,9 +105,8 @@
 ; SI-NEXT:    s_mov_b32 s4, 0xffff
 ; SI-NEXT:    v_and_b32_e32 v0, s4, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s4, v1
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI-NEXT:    v_and_b32_e32 v1, s4, v2
-; SI-NEXT:    v_max_u32_e32 v0, v1, v0
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: test_max3_u16_commute:
@@ -129,16 +119,14 @@
 ; GFX9-LABEL: test_max3_u16_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u16_e32 v0, v2, v0
+; GFX9-NEXT:    v_max3_u16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_u16_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_u16 v0, v0, v1
-; GFX10-NEXT:    v_max_u16 v0, v2, v0
+; GFX10-NEXT:    v_max3_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umax = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   %umax3 = call i16 @llvm.umax.i16(i16 %c, i16 %umax)
@@ -149,23 +137,20 @@
 ; SI_VI-LABEL: test_max3_i32:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_max_i32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_max_i32_e32 v0, v0, v2
+; SI_VI-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX9-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX10-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   %smax3 = call i32 @llvm.smax.i32(i32 %smax, i32 %c)
@@ -187,23 +172,20 @@
 ; SI_VI-LABEL: test_max3_i32_commute:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_max_i32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_max_i32_e32 v0, v2, v0
+; SI_VI-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_max3_i32_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i32_e32 v0, v2, v0
+; GFX9-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_i32_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_i32_e32 v0, v2, v0
+; GFX10-NEXT:    v_max3_i32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   %smax3 = call i32 @llvm.smax.i32(i32 %c, i32 %smax)
@@ -232,16 +214,14 @@
 ; GFX9-LABEL: test_max3_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_i16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i16_e32 v0, v0, v2
+; GFX9-NEXT:    v_max3_i16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_i16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_i16 v0, v0, v1
-; GFX10-NEXT:    v_max_i16 v0, v0, v2
+; GFX10-NEXT:    v_max3_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smax = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   %smax3 = call i16 @llvm.smax.i16(i16 %smax, i16 %c)
@@ -270,16 +250,14 @@
 ; GFX9-LABEL: test_max3_i16_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_i16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i16_e32 v0, v2, v0
+; GFX9-NEXT:    v_max3_i16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max3_i16_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_max_i16 v0, v0, v1
-; GFX10-NEXT:    v_max_i16 v0, v2, v0
+; GFX10-NEXT:    v_max3_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smax = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   %smax3 = call i16 @llvm.smax.i16(i16 %c, i16 %smax)
@@ -329,25 +307,22 @@
 define amdgpu_ps void @test_max3_u32_vvv(i32 %a, i32 %b, i32 %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_max3_u32_vvv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
+; SI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_u32_e32 v0, v0, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_max3_u32_vvv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_u32_e32 v0, v0, v1
-; VI-NEXT:    v_max_u32_e32 v0, v0, v2
+; VI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_max3_u32_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_u32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -359,25 +334,22 @@
 define amdgpu_ps void @test_max3_u32_svv(i32 inreg %a, i32 %b, i32 %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_max3_u32_svv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_u32_e32 v0, s2, v0
+; SI-NEXT:    v_max3_u32 v0, s2, v0, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_max3_u32_svv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_u32_e32 v0, s2, v0
-; VI-NEXT:    v_max_u32_e32 v0, v0, v1
+; VI-NEXT:    v_max3_u32 v0, s2, v0, v1
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_max3_u32_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_u32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_u32_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_max3_u32 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -389,25 +361,22 @@
 define amdgpu_ps void @test_max3_u32_vvs(i32 %a, i32 %b, i32 inreg %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_max3_u32_vvs:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_max_u32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; SI-NEXT:    v_max3_u32 v0, v0, v1, s2
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_max3_u32_vvs:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_u32_e32 v0, v0, v1
-; VI-NEXT:    v_max_u32_e32 v0, s2, v0
+; VI-NEXT:    v_max3_u32 v0, v0, v1, s2
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_max3_u32_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_u32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_max_u32_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_max3_u32 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -449,27 +418,33 @@
 define amdgpu_ps void @test_max3_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_max3_i32_vss:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_max_i32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_max_i32_e32 v0, s3, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; SI-NEXT:    v_mov_b32_e32 v3, s3
+; SI-NEXT:    v_max3_i32 v0, v0, s2, v3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_max3_i32_vss:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_max_i32_e32 v0, s2, v0
-; VI-NEXT:    v_max_i32_e32 v0, s3, v0
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_max3_i32 v0, v0, s2, v3
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_max3_i32_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_max_i32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_max_i32_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_max3_i32_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_max3_i32 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_dword v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_max3_i32_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_max3_i32 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   %max3 = call i32 @llvm.smax.i32(i32 %max, i32 %c)
   store i32 %max3, i32 addrspace(1)* %out, align 4
@@ -518,8 +493,7 @@
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
 ; SI-NEXT:    v_and_b32_e32 v2, s0, v2
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI-NEXT:    v_max_u32_e32 v0, v0, v2
+; SI-NEXT:    v_max3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_short v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -531,19 +505,11 @@
 ; VI-NEXT:    flat_store_short v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_max3_u16_vvv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v2
-; GFX9-NEXT:    global_store_short v[3:4], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_max3_u16_vvv:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_u16 v0, v0, v1
-; GFX10-NEXT:    v_max_u16 v0, v0, v2
-; GFX10-NEXT:    global_store_short v[3:4], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_max3_u16_vvv:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_max3_u16 v0, v0, v1, v2
+; GFX9_10-NEXT:    global_store_short v[3:4], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   %max3 = call i16 @llvm.umax.i16(i16 %max, i16 %c)
   store i16 %max3, i16 addrspace(1)* %out, align 4
@@ -559,8 +525,7 @@
 ; SI-NEXT:    s_and_b32 s1, s2, s0
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
-; SI-NEXT:    v_max_u32_e32 v0, s1, v0
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
+; SI-NEXT:    v_max3_u32 v0, s1, v0, v1
 ; SI-NEXT:    s_mov_b64 s[4:5], 0
 ; SI-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -572,19 +537,11 @@
 ; VI-NEXT:    flat_store_short v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_max3_u16_svv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_u16_e32 v0, s2, v0
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v1
-; GFX9-NEXT:    global_store_short v[2:3], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_max3_u16_svv:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_u16 v0, s2, v0
-; GFX10-NEXT:    v_max_u16 v0, v0, v1
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_max3_u16_svv:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_max3_u16 v0, s2, v0, v1
+; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   %max3 = call i16 @llvm.umax.i16(i16 %max, i16 %c)
   store i16 %max3, i16 addrspace(1)* %out, align 4
@@ -600,8 +557,7 @@
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
 ; SI-NEXT:    s_and_b32 s0, s2, s0
-; SI-NEXT:    v_max_u32_e32 v0, v0, v1
-; SI-NEXT:    v_max_u32_e32 v0, s0, v0
+; SI-NEXT:    v_max3_u32 v0, v0, v1, s0
 ; SI-NEXT:    s_mov_b64 s[4:5], 0
 ; SI-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -613,19 +569,11 @@
 ; VI-NEXT:    flat_store_short v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_max3_u16_vvs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_u16_e32 v0, s2, v0
-; GFX9-NEXT:    global_store_short v[2:3], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_max3_u16_vvs:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_u16 v0, v0, v1
-; GFX10-NEXT:    v_max_u16 v0, v0, s2
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_max3_u16_vvs:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_max3_u16 v0, v0, v1, s2
+; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   %max3 = call i16 @llvm.umax.i16(i16 %max, i16 %c)
   store i16 %max3, i16 addrspace(1)* %out, align 4
@@ -703,15 +651,14 @@
 ;
 ; GFX9-LABEL: test_max3_i16_vss:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_max_i16_e32 v0, s2, v0
-; GFX9-NEXT:    v_max_i16_e32 v0, s3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_max3_i16 v0, v0, s2, v3
 ; GFX9-NEXT:    global_store_short v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_max3_i16_vss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_max_i16 v0, v0, s2
-; GFX10-NEXT:    v_max_i16 v0, v0, s3
+; GFX10-NEXT:    v_max3_i16 v0, v0, s2, s3
 ; GFX10-NEXT:    global_store_short v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/min3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/min3.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/min3.ll
@@ -8,23 +8,20 @@
 ; SI_VI-LABEL: test_min3_u32:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_min_u32_e32 v0, v0, v2
+; SI_VI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_u32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u32_e32 v0, v0, v2
+; GFX9-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX10-NEXT:    v_min_u32_e32 v0, v0, v2
+; GFX10-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
   %umin3 = call i32 @llvm.umin.i32(i32 %umin, i32 %c)
@@ -46,23 +43,20 @@
 ; SI_VI-LABEL: test_min3_u32_commute:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_min_u32_e32 v0, v2, v0
+; SI_VI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_u32_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u32_e32 v0, v2, v0
+; GFX9-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_u32_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX10-NEXT:    v_min_u32_e32 v0, v2, v0
+; GFX10-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
   %umin3 = call i32 @llvm.umin.i32(i32 %c, i32 %umin)
@@ -76,9 +70,8 @@
 ; SI-NEXT:    s_mov_b32 s4, 0xffff
 ; SI-NEXT:    v_and_b32_e32 v0, s4, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s4, v1
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI-NEXT:    v_and_b32_e32 v1, s4, v2
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: test_min3_u16:
@@ -91,16 +84,14 @@
 ; GFX9-LABEL: test_min3_u16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v2
+; GFX9-NEXT:    v_min3_u16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_u16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_u16 v0, v0, v1
-; GFX10-NEXT:    v_min_u16 v0, v0, v2
+; GFX10-NEXT:    v_min3_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umin = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   %umin3 = call i16 @llvm.umin.i16(i16 %umin, i16 %c)
@@ -114,9 +105,8 @@
 ; SI-NEXT:    s_mov_b32 s4, 0xffff
 ; SI-NEXT:    v_and_b32_e32 v0, s4, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s4, v1
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI-NEXT:    v_and_b32_e32 v1, s4, v2
-; SI-NEXT:    v_min_u32_e32 v0, v1, v0
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: test_min3_u16_commute:
@@ -129,16 +119,14 @@
 ; GFX9-LABEL: test_min3_u16_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u16_e32 v0, v2, v0
+; GFX9-NEXT:    v_min3_u16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_u16_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_u16 v0, v0, v1
-; GFX10-NEXT:    v_min_u16 v0, v2, v0
+; GFX10-NEXT:    v_min3_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %umin = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   %umin3 = call i16 @llvm.umin.i16(i16 %c, i16 %umin)
@@ -149,23 +137,20 @@
 ; SI_VI-LABEL: test_min3_i32:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_min_i32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_min_i32_e32 v0, v0, v2
+; SI_VI-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_i32_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_i32_e32 v0, v0, v2
+; GFX9-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_i32_e32 v0, v0, v1
-; GFX10-NEXT:    v_min_i32_e32 v0, v0, v2
+; GFX10-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %smin3 = call i32 @llvm.smin.i32(i32 %smin, i32 %c)
@@ -187,23 +172,20 @@
 ; SI_VI-LABEL: test_min3_i32_commute:
 ; SI_VI:       ; %bb.0:
 ; SI_VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI_VI-NEXT:    v_min_i32_e32 v0, v0, v1
-; SI_VI-NEXT:    v_min_i32_e32 v0, v2, v0
+; SI_VI-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; SI_VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_min3_i32_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_i32_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_i32_e32 v0, v2, v0
+; GFX9-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_i32_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_i32_e32 v0, v0, v1
-; GFX10-NEXT:    v_min_i32_e32 v0, v2, v0
+; GFX10-NEXT:    v_min3_i32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %smin3 = call i32 @llvm.smin.i32(i32 %c, i32 %smin)
@@ -232,16 +214,14 @@
 ; GFX9-LABEL: test_min3_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_i16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_i16_e32 v0, v0, v2
+; GFX9-NEXT:    v_min3_i16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_i16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_i16 v0, v0, v1
-; GFX10-NEXT:    v_min_i16 v0, v0, v2
+; GFX10-NEXT:    v_min3_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smin = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   %smin3 = call i16 @llvm.smin.i16(i16 %smin, i16 %c)
@@ -270,16 +250,14 @@
 ; GFX9-LABEL: test_min3_i16_commute:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_min_i16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_i16_e32 v0, v2, v0
+; GFX9-NEXT:    v_min3_i16 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min3_i16_commute:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_min_i16 v0, v0, v1
-; GFX10-NEXT:    v_min_i16 v0, v2, v0
+; GFX10-NEXT:    v_min3_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %smin = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   %smin3 = call i16 @llvm.smin.i16(i16 %c, i16 %smin)
@@ -329,25 +307,22 @@
 define amdgpu_ps void @test_min3_u32_vvv(i32 %a, i32 %b, i32 %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_min3_u32_vvv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
+; SI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_u32_e32 v0, v0, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_min3_u32_vvv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_u32_e32 v0, v0, v1
-; VI-NEXT:    v_min_u32_e32 v0, v0, v2
+; VI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_min3_u32_vvv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_u32_e32 v0, v0, v2
+; GFX9_10-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; GFX9_10-NEXT:    global_store_dword v[3:4], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
@@ -359,25 +334,22 @@
 define amdgpu_ps void @test_min3_u32_svv(i32 inreg %a, i32 %b, i32 %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_min3_u32_svv:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_u32_e32 v0, s2, v0
+; SI-NEXT:    v_min3_u32 v0, s2, v0, v1
 ; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_min3_u32_svv:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_u32_e32 v0, s2, v0
-; VI-NEXT:    v_min_u32_e32 v0, v0, v1
+; VI-NEXT:    v_min3_u32 v0, s2, v0, v1
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_min3_u32_svv:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_u32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_u32_e32 v0, v0, v1
+; GFX9_10-NEXT:    v_min3_u32 v0, s2, v0, v1
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
@@ -389,25 +361,22 @@
 define amdgpu_ps void @test_min3_u32_vvs(i32 %a, i32 %b, i32 inreg %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_min3_u32_vvs:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_min_u32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; SI-NEXT:    v_min3_u32 v0, v0, v1, s2
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[2:3], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_min3_u32_vvs:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_u32_e32 v0, v0, v1
-; VI-NEXT:    v_min_u32_e32 v0, s2, v0
+; VI-NEXT:    v_min3_u32 v0, v0, v1, s2
 ; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9_10-LABEL: test_min3_u32_vvs:
 ; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_u32_e32 v0, v0, v1
-; GFX9_10-NEXT:    v_min_u32_e32 v0, s2, v0
+; GFX9_10-NEXT:    v_min3_u32 v0, v0, v1, s2
 ; GFX9_10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9_10-NEXT:    s_endpgm
   %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
@@ -449,27 +418,33 @@
 define amdgpu_ps void @test_min3_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
 ; SI-LABEL: test_min3_i32_vss:
 ; SI:       ; %bb.0:
-; SI-NEXT:    v_min_i32_e32 v0, s2, v0
-; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    v_min_i32_e32 v0, s3, v0
-; SI-NEXT:    s_mov_b64 s[4:5], 0
-; SI-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; SI-NEXT:    v_mov_b32_e32 v3, s3
+; SI-NEXT:    v_min3_i32 v0, v0, s2, v3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b64 s[0:1], 0
+; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_min3_i32_vss:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_min_i32_e32 v0, s2, v0
-; VI-NEXT:    v_min_i32_e32 v0, s3, v0
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_min3_i32 v0, v0, s2, v3
 ; VI-NEXT:    flat_store_dword v[1:2], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9_10-LABEL: test_min3_i32_vss:
-; GFX9_10:       ; %bb.0:
-; GFX9_10-NEXT:    v_min_i32_e32 v0, s2, v0
-; GFX9_10-NEXT:    v_min_i32_e32 v0, s3, v0
-; GFX9_10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX9_10-NEXT:    s_endpgm
+; GFX9-LABEL: test_min3_i32_vss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_min3_i32 v0, v0, s2, v3
+; GFX9-NEXT:    global_store_dword v[1:2], v0, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: test_min3_i32_vss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_min3_i32 v0, v0, s2, s3
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %min3 = call i32 @llvm.smin.i32(i32 %min, i32 %c)
   store i32 %min3, i32 addrspace(1)* %out, align 4
@@ -518,8 +493,7 @@
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
 ; SI-NEXT:    v_and_b32_e32 v2, s0, v2
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI-NEXT:    v_min_u32_e32 v0, v0, v2
+; SI-NEXT:    v_min3_u32 v0, v0, v1, v2
 ; SI-NEXT:    s_mov_b64 s[0:1], 0
 ; SI-NEXT:    buffer_store_short v0, v[3:4], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -531,19 +505,11 @@
 ; VI-NEXT:    flat_store_short v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_min3_u16_vvv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v2
-; GFX9-NEXT:    global_store_short v[3:4], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_min3_u16_vvv:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_u16 v0, v0, v1
-; GFX10-NEXT:    v_min_u16 v0, v0, v2
-; GFX10-NEXT:    global_store_short v[3:4], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_min3_u16_vvv:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_min3_u16 v0, v0, v1, v2
+; GFX9_10-NEXT:    global_store_short v[3:4], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   %min3 = call i16 @llvm.umin.i16(i16 %min, i16 %c)
   store i16 %min3, i16 addrspace(1)* %out, align 4
@@ -559,8 +525,7 @@
 ; SI-NEXT:    s_and_b32 s1, s2, s0
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
-; SI-NEXT:    v_min_u32_e32 v0, s1, v0
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
+; SI-NEXT:    v_min3_u32 v0, s1, v0, v1
 ; SI-NEXT:    s_mov_b64 s[4:5], 0
 ; SI-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -572,19 +537,11 @@
 ; VI-NEXT:    flat_store_short v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_min3_u16_svv:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_u16_e32 v0, s2, v0
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v1
-; GFX9-NEXT:    global_store_short v[2:3], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_min3_u16_svv:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_u16 v0, s2, v0
-; GFX10-NEXT:    v_min_u16 v0, v0, v1
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_min3_u16_svv:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_min3_u16 v0, s2, v0, v1
+; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   %min3 = call i16 @llvm.umin.i16(i16 %min, i16 %c)
   store i16 %min3, i16 addrspace(1)* %out, align 4
@@ -600,8 +557,7 @@
 ; SI-NEXT:    v_and_b32_e32 v0, s0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v1
 ; SI-NEXT:    s_and_b32 s0, s2, s0
-; SI-NEXT:    v_min_u32_e32 v0, v0, v1
-; SI-NEXT:    v_min_u32_e32 v0, s0, v0
+; SI-NEXT:    v_min3_u32 v0, v0, v1, s0
 ; SI-NEXT:    s_mov_b64 s[4:5], 0
 ; SI-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -613,19 +569,11 @@
 ; VI-NEXT:    flat_store_short v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_min3_u16_vvs:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_min_u16_e32 v0, s2, v0
-; GFX9-NEXT:    global_store_short v[2:3], v0, off
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: test_min3_u16_vvs:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_u16 v0, v0, v1
-; GFX10-NEXT:    v_min_u16 v0, v0, s2
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
+; GFX9_10-LABEL: test_min3_u16_vvs:
+; GFX9_10:       ; %bb.0:
+; GFX9_10-NEXT:    v_min3_u16 v0, v0, v1, s2
+; GFX9_10-NEXT:    global_store_short v[2:3], v0, off
+; GFX9_10-NEXT:    s_endpgm
   %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   %min3 = call i16 @llvm.umin.i16(i16 %min, i16 %c)
   store i16 %min3, i16 addrspace(1)* %out, align 4
@@ -703,15 +651,14 @@
 ;
 ; GFX9-LABEL: test_min3_i16_vss:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_min_i16_e32 v0, s2, v0
-; GFX9-NEXT:    v_min_i16_e32 v0, s3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_min3_i16 v0, v0, s2, v3
 ; GFX9-NEXT:    global_store_short v[1:2], v0, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: test_min3_i16_vss:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_min_i16 v0, v0, s2
-; GFX10-NEXT:    v_min_i16 v0, v0, s3
+; GFX10-NEXT:    v_min3_i16 v0, v0, s2, s3
 ; GFX10-NEXT:    global_store_short v[1:2], v0, off
 ; GFX10-NEXT:    s_endpgm
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
Index: llvm/test/CodeGen/AMDGPU/ctlz.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -524,10 +524,11 @@
 ; SI-NEXT:    s_flbit_i32_b32 s4, s4
 ; SI-NEXT:    s_flbit_i32_b32 s5, s5
 ; SI-NEXT:    s_min_u32 s4, s4, 0xffffffdf
-; SI-NEXT:    v_mov_b32_e32 v0, s5
 ; SI-NEXT:    s_add_i32 s4, s4, 32
-; SI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; SI-NEXT:    s_min_u32 s4, s4, s5
+; SI-NEXT:    s_min_u32 s4, s4, 64
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -542,7 +543,8 @@
 ; VI-NEXT:    s_flbit_i32_b32 s4, s4
 ; VI-NEXT:    v_add_u32_e64 v0, s[6:7], s4, 32 clamp
 ; VI-NEXT:    s_flbit_i32_b32 s4, s5
-; VI-NEXT:    v_min3_u32 v0, v0, s4, 64
+; VI-NEXT:    v_min_u32_e32 v0, s4, v0
+; VI-NEXT:    v_min_u32_e32 v0, 64, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -574,7 +576,8 @@
 ; GFX10-NEXT:    s_flbit_i32_b32 s0, s2
 ; GFX10-NEXT:    v_add_nc_u32_e64 v0, s0, 32 clamp
 ; GFX10-NEXT:    s_flbit_i32_b32 s0, s3
-; GFX10-NEXT:    v_min3_u32 v0, v0, s0, 64
+; GFX10-NEXT:    v_min_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-NEXT:    global_store_dwordx2 v1, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -603,14 +606,15 @@
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_flbit_i32_b32 s4, s4
-; SI-NEXT:    s_min_u32 s4, s4, 0xffffffdf
-; SI-NEXT:    s_flbit_i32_b32 s5, s5
-; SI-NEXT:    s_add_i32 s4, s4, 32
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; SI-NEXT:    s_flbit_i32_b32 s2, s4
+; SI-NEXT:    s_flbit_i32_b32 s4, s5
+; SI-NEXT:    s_min_u32 s2, s2, 0xffffffdf
+; SI-NEXT:    s_add_i32 s2, s2, 32
+; SI-NEXT:    s_min_u32 s2, s2, s4
+; SI-NEXT:    s_min_u32 s4, s2, 64
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -624,7 +628,8 @@
 ; VI-NEXT:    s_flbit_i32_b32 s4, s4
 ; VI-NEXT:    v_add_u32_e64 v0, s[6:7], s4, 32 clamp
 ; VI-NEXT:    s_flbit_i32_b32 s4, s5
-; VI-NEXT:    v_min3_u32 v0, v0, s4, 64
+; VI-NEXT:    v_min_u32_e32 v0, s4, v0
+; VI-NEXT:    v_min_u32_e32 v0, 64, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -655,7 +660,8 @@
 ; GFX10-NEXT:    s_flbit_i32_b32 s0, s2
 ; GFX10-NEXT:    v_add_nc_u32_e64 v0, s0, 32 clamp
 ; GFX10-NEXT:    s_flbit_i32_b32 s0, s3
-; GFX10-NEXT:    v_min3_u32 v0, v0, s0, 64
+; GFX10-NEXT:    v_min_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -775,9 +781,8 @@
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e64 v0, v0, 32 clamp
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT:    v_min3_u32 v0, v1, v0, 64
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -888,8 +893,7 @@
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v2, v2
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e64 v1, v1, 32 clamp
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, v2, v1
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 64, v1
+; GFX10-GISEL-NEXT:    v_min3_u32 v1, v2, v1, 64
 ; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
Index: llvm/test/CodeGen/AMDGPU/cttz.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/cttz.ll
+++ llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -515,12 +515,13 @@
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_ff1_i32_b32 s5, s5
+; SI-NEXT:    s_ff1_i32_b32 s4, s4
 ; SI-NEXT:    s_min_u32 s5, s5, 0xffffffdf
 ; SI-NEXT:    s_add_i32 s5, s5, 32
-; SI-NEXT:    s_ff1_i32_b32 s4, s4
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; SI-NEXT:    s_min_u32 s4, s4, s5
+; SI-NEXT:    s_min_u32 s4, s4, 64
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -535,7 +536,8 @@
 ; VI-NEXT:    s_ff1_i32_b32 s5, s5
 ; VI-NEXT:    v_add_u32_e64 v0, s[6:7], s5, 32 clamp
 ; VI-NEXT:    s_ff1_i32_b32 s4, s4
-; VI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; VI-NEXT:    v_min_u32_e32 v0, s4, v0
+; VI-NEXT:    v_min_u32_e32 v0, 64, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -567,7 +569,8 @@
 ; GFX10-NEXT:    s_ff1_i32_b32 s0, s3
 ; GFX10-NEXT:    v_add_nc_u32_e64 v0, s0, 32 clamp
 ; GFX10-NEXT:    s_ff1_i32_b32 s0, s2
-; GFX10-NEXT:    v_min3_u32 v0, s0, v0, 64
+; GFX10-NEXT:    v_min_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-NEXT:    global_store_dwordx2 v1, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -596,14 +599,15 @@
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_ff1_i32_b32 s5, s5
-; SI-NEXT:    s_min_u32 s5, s5, 0xffffffdf
-; SI-NEXT:    s_add_i32 s5, s5, 32
+; SI-NEXT:    s_ff1_i32_b32 s2, s5
 ; SI-NEXT:    s_ff1_i32_b32 s4, s4
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; SI-NEXT:    s_min_u32 s2, s2, 0xffffffdf
+; SI-NEXT:    s_add_i32 s2, s2, 32
+; SI-NEXT:    s_min_u32 s2, s4, s2
+; SI-NEXT:    s_min_u32 s4, s2, 64
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -617,7 +621,8 @@
 ; VI-NEXT:    s_ff1_i32_b32 s5, s5
 ; VI-NEXT:    v_add_u32_e64 v0, s[6:7], s5, 32 clamp
 ; VI-NEXT:    s_ff1_i32_b32 s4, s4
-; VI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; VI-NEXT:    v_min_u32_e32 v0, s4, v0
+; VI-NEXT:    v_min_u32_e32 v0, 64, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -648,7 +653,8 @@
 ; GFX10-NEXT:    s_ff1_i32_b32 s0, s3
 ; GFX10-NEXT:    v_add_nc_u32_e64 v0, s0, 32 clamp
 ; GFX10-NEXT:    s_ff1_i32_b32 s0, s2
-; GFX10-NEXT:    v_min3_u32 v0, s0, v0, 64
+; GFX10-NEXT:    v_min_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -768,9 +774,8 @@
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e64 v1, v1, 32 clamp
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    v_min3_u32 v0, v0, v1, 64
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 64, v0
 ; GFX10-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -881,8 +886,7 @@
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v2, v2
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e64 v2, v2, 32 clamp
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, v1, v2
-; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 64, v1
+; GFX10-GISEL-NEXT:    v_min3_u32 v1, v1, v2, 64
 ; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
Index: llvm/test/CodeGen/AMDGPU/fmax3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -1,13 +1,20 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,CST_BUS_LIM_1 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CST_BUS_LIM_1 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SI_VI,CST_BUS_LIM_1 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SI_VI,CST_BUS_LIM_1 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9_10,CST_BUS_LIM_1 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX9_10,CST_BUS_LIM_2 %s
 
 ; GCN-LABEL: {{^}}test_fmax3_olt_0_f32:
-; GCN: buffer_load_dword [[REGC:v[0-9]+]]
-; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGA:v[0-9]+]]
-; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; SI_VI: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[REGB]]
+; GFX9_10: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f32_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GCN: v_max_f32_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI_VI: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[REGC]]
+; GFX9_10: v_max_f32_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[FMAX]], [[QUIET_C]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
@@ -22,10 +29,17 @@
 
 ; Commute operand of second fmax
 ; GCN-LABEL: {{^}}test_fmax3_olt_1_f32:
-; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGC:v[0-9]+]]
-; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[REGB]]
+; GFX9_10: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f32_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GCN: v_max_f32_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI_VI: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[REGC]]
+; GFX9_10: v_max_f32_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET_C]], [[FMAX]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
@@ -46,7 +60,11 @@
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
-; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_A]], [[CVT_B]], [[CVT_C]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[CVT_A]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[CVT_B]]
+; SI-DAG: v_max_f32_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[CVT_C]]
+; SI-DAG: v_max_f32_e32 [[RESULT_F32:v[0-9]+]], [[FMAX]], [[QUIET_C]]
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
 
 ; VI-DAG: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
@@ -55,7 +73,12 @@
 ; VI: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], [[MAX0]], [[QUIET_C]]
 
-; GFX9_10: v_max3_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
+
+; GFX9_10: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f16_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GFX9_10: v_max_f16_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; GFX9_10: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GFX9_10: v_max_f16_e32 [[RESULT:v[0-9]+]], [[FMAX]], [[QUIET_C]]
 ; GCN: buffer_store_short [[RESULT]],
 define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
   %a = load volatile half, half addrspace(1)* %aptr, align 2
@@ -76,7 +99,11 @@
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
-; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[CVT_A]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[CVT_B]]
+; SI-DAG: v_max_f32_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[CVT_C]]
+; SI-DAG: v_max_f32_e32 [[RESULT_F32:v[0-9]+]], [[QUIET_C]], [[FMAX]]
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
 
 ; VI-DAG: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
@@ -85,7 +112,11 @@
 ; VI: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], [[QUIET_C]], [[MAX0]]
 
-; GFX9_10: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
+; GFX9_10: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f16_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GFX9_10: v_max_f16_e32 [[FMAX:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; GFX9_10: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GFX9_10: v_max_f16_e32 [[RESULT:v[0-9]+]], [[QUIET_C]], [[FMAX]]
 ; GCN: buffer_store_short [[RESULT]],
 define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
   %a = load volatile half, half addrspace(1)* %aptr, align 2
@@ -206,11 +237,11 @@
 
 ; GCN-LABEL: {{^}}test_max3_f32_sss:
 ; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_1: v_max3_f32 v{{[0-9]+}}, s0, v[[B]], v[[C]]
+; CST_BUS_LIM_1: v_max_f32_e32 v[[FMAX:[0-9]+]], s0, v[[B]]
+; CST_BUS_LIM_1: v_max_f32_e32 v{{[0-9]+}}, s2, v[[FMAX]]
 
-; CST_BUS_LIM_2: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_2: v_max3_f32 v{{[0-9]+}}, s0, s1, v[[C]]
+; CST_BUS_LIM_2: v_max_f32_e64 v[[FMAX:[0-9]+]], s0, s1
+; CST_BUS_LIM_2: v_max_f32_e32 v{{[0-9]+}}, s2, v[[FMAX]]
 define amdgpu_ps void @test_max3_f32_sss(float inreg %a, float inreg %b, float inreg %c, float addrspace(1)* %out) {
   %fmax = call float @llvm.maxnum.f32(float %a, float %b)
   %fmax3 = call float @llvm.maxnum.f32(float %fmax, float %c)
@@ -320,15 +351,16 @@
 ; SI: v_cvt_f32_f16_e32 v[[C:[0-9]+]], v[[C_F16]]
 ; SI: v_cvt_f32_f16_e32 v[[B:[0-9]+]], v[[B_F16]]
 ; SI: v_cvt_f32_f16_e32 v[[A:[0-9]+]], v[[A_F16]]
-; SI: v_max3_f32 v{{[0-9]+}}, v[[A]], v[[B]], v[[C]]
+; SI: v_max_f32_e32 v[[FMAX:[0-9]+]], v[[A]], v[[B]]
+; SI: v_max_f32_e32 v{{[0-9]+}}, v[[FMAX]], v[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_max_f16_e32 v[[MAX:[0-9]+]], s0, v[[B]]
 ; VI: v_max_f16_e32 v{{[0-9]+}}, s2, v[[MAX]]
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX9: v_max3_f16 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-; GFX10: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX10: v_max3_f16 v{{[0-9]+}}, s0, s1, v[[C]]
+; GFX9: v_max_f16_e32 v[[FMAX:[0-9]+]], s0, v[[B]]
+; GFX9: v_max_f16_e32 v{{[0-9]+}}, s2, v[[FMAX]]
+; GFX10: v_max_f16_e64 v[[FMAX:[0-9]+]], s0, s1
+; GFX10: v_max_f16_e32 v{{[0-9]+}}, s2, v[[FMAX]]
 define amdgpu_ps void @test_max3_f16_sss(half inreg %a, half inreg %b, half inreg %c, half addrspace(1)* %out) {
   %fmax = call half @llvm.maxnum.f16(half %a, half %b)
   %fmax3 = call half @llvm.maxnum.f16(half %fmax, half %c)
Index: llvm/test/CodeGen/AMDGPU/fmin3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -1,13 +1,20 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,CST_BUS_LIM_1 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CST_BUS_LIM_1 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SI_VI,CST_BUS_LIM_1 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SI_VI,CST_BUS_LIM_1 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9_10,CST_BUS_LIM_1 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX9_10,CST_BUS_LIM_2 %s
 
 ; GCN-LABEL: {{^}}test_fmin3_olt_0_f32:
-; GCN: buffer_load_dword [[REGC:v[0-9]+]]
-; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGA:v[0-9]+]]
-; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; SI_VI: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[REGB]]
+; GFX9_10: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f32_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GCN: v_min_f32_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI_VI: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[REGC]]
+; GFX9_10: v_max_f32_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[FMIN]], [[QUIET_C]]
 ; GCN: buffer_store_dword [[RESULT]],
 define amdgpu_kernel void @test_fmin3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile float, float addrspace(1)* %aptr, align 4
@@ -21,10 +28,17 @@
 
 ; Commute operand of second fmin
 ; GCN-LABEL: {{^}}test_fmin3_olt_1_f32:
-; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
 ; GCN: buffer_load_dword [[REGC:v[0-9]+]]
-; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[REGA]]
+; SI_VI: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[REGB]]
+; GFX9_10: v_max_f32_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f32_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GCN: v_min_f32_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI_VI: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[REGC]]
+; GFX9_10: v_max_f32_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[QUIET_C]], [[FMIN]]
 ; GCN: buffer_store_dword [[RESULT]],
 define amdgpu_kernel void @test_fmin3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile float, float addrspace(1)* %aptr, align 4
@@ -37,17 +51,29 @@
 }
 
 ; GCN-LABEL: {{^}}test_fmin3_olt_0_f16:
-; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
-; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
 ; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
 
-; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[CVT_A]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[CVT_B]]
+; SI-DAG: v_min_f32_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[CVT_C]]
+; SI-DAG: v_min_f32_e32 [[RESULT_F32:v[0-9]+]], [[FMIN]], [[QUIET_C]]
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
 
 ; VI: v_min_f16_e32
 ; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
 
-; GFX9_10: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GFX9_10: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f16_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GFX9_10: v_min_f16_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; GFX9_10: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GFX9_10: v_min_f16_e32 [[RESULT:v[0-9]+]], [[FMIN]], [[QUIET_C]]
+
 ; GCN: buffer_store_short [[RESULT]],
 define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
   %a = load volatile half, half addrspace(1)* %aptr, align 2
@@ -68,13 +94,22 @@
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
-; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[CVT_A]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_B:v[0-9]+]], 1.0, [[CVT_B]]
+; SI-DAG: v_min_f32_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; SI-DAG: v_mul_f32_e32 [[QUIET_C:v[0-9]+]], 1.0, [[CVT_C]]
+; SI-DAG: v_min_f32_e32 [[RESULT_F32:v[0-9]+]], [[QUIET_C]], [[FMIN]]
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
 
 ; VI: v_min_f16_e32
 ; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
 
-; GFX9_10: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
+; GFX9_10: v_max_f16_e32 [[QUIET_A:v[0-9]+]], [[REGA]], [[REGA]]
+; GFX9_10: v_max_f16_e32 [[QUIET_B:v[0-9]+]], [[REGB]], [[REGB]]
+; GFX9_10: v_min_f16_e32 [[FMIN:v[0-9]+]], [[QUIET_A]], [[QUIET_B]]
+; GFX9_10: v_max_f16_e32 [[QUIET_C:v[0-9]+]], [[REGC]], [[REGC]]
+; GFX9_10: v_min_f16_e32 [[RESULT:v[0-9]+]], [[QUIET_C]], [[FMIN]]
+
 ; GCN: buffer_store_short [[RESULT]],
 define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
   %a = load volatile half, half addrspace(1)* %aptr, align 2
@@ -221,11 +256,11 @@
 
 ; GCN-LABEL: {{^}}test_fmin3_f32_sss:
 ; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_1: v_min3_f32 v{{[0-9]+}}, s0, v[[B]], v[[C]]
+; CST_BUS_LIM_1: v_min_f32_e32 v[[FMIN:[0-9]+]], s0, v[[B]]
+; CST_BUS_LIM_1: v_min_f32_e32 v{{[0-9]+}}, s2, v[[FMIN]]
 
-; CST_BUS_LIM_2: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_2: v_min3_f32 v{{[0-9]+}}, s0, s1, v[[C]]
+; CST_BUS_LIM_2: v_min_f32_e64 v[[FMIN:[0-9]+]], s0, s1
+; CST_BUS_LIM_2: v_min_f32_e32 v{{[0-9]+}}, s2, v[[FMIN]]
 define amdgpu_ps void @test_fmin3_f32_sss(float inreg %a, float inreg %b, float inreg %c, float addrspace(1)* %out) {
   %fmin = call float @llvm.minnum.f32(float %a, float %b)
   %fmin3 = call float @llvm.minnum.f32(float %fmin, float %c)
@@ -335,15 +370,16 @@
 ; SI: v_cvt_f32_f16_e32 v[[C:[0-9]+]], v[[C_F16]]
 ; SI: v_cvt_f32_f16_e32 v[[B:[0-9]+]], v[[B_F16]]
 ; SI: v_cvt_f32_f16_e32 v[[A:[0-9]+]], v[[A_F16]]
-; SI: v_min3_f32 v{{[0-9]+}}, v[[A]], v[[B]], v[[C]]
+; SI: v_min_f32_e32 v[[FMIN:[0-9]+]], v[[A]], v[[B]]
+; SI: v_min_f32_e32 v{{[0-9]+}}, v[[FMIN]], v[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_min_f16_e32 v[[MIN:[0-9]+]], s0, v[[B]]
 ; VI: v_min_f16_e32 v{{[0-9]+}}, s2, v[[MIN]]
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX9: v_min3_f16 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-; GFX10: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX10: v_min3_f16 v{{[0-9]+}}, s0, s1, v[[C]]
+; GFX9: v_min_f16_e32 v[[FMIN:[0-9]+]], s0, v[[B]]
+; GFX9: v_min_f16_e32 v{{[0-9]+}}, s2, v[[FMIN]]
+; GFX10: v_min_f16_e64 v[[FMIN:[0-9]+]], s0, s1
+; GFX10: v_min_f16_e32 v{{[0-9]+}}, s2, v[[FMIN]]
 define amdgpu_ps void @test_min3_f16_sss(half inreg %a, half inreg %b, half inreg %c, half addrspace(1)* %out) {
   %fmin = call half @llvm.minnum.f16(half %a, half %b)
   %fmin3 = call half @llvm.minnum.f16(half %fmin, half %c)
Index: llvm/test/CodeGen/AMDGPU/known-never-snan.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -540,6 +540,9 @@
 ; GCN-LABEL: v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GCN-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GCN-NEXT:    v_min3_f32 v0, v0, v1, v2
 ; GCN-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
Index: llvm/test/CodeGen/AMDGPU/max3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/max3.ll
+++ llvm/test/CodeGen/AMDGPU/max3.ll
@@ -302,8 +302,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_max3_i32_ssv:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_max3_i32 v{{[0-9]+}}, s0, v[[B]], v0
+; CST_BUS_LIM_1: s_max_i32 s[[S_MIN:[0-9]+]], s0, s1
+; CST_BUS_LIM_1: v_max_i32_e32 v{{[0-9]+}}, s[[S_MIN]], v0
 
 ; CST_BUS_LIM_2: v_max3_i32 v{{[0-9]+}}, s0, s1, v0
 define amdgpu_ps void @test_max3_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c, i32 addrspace(1)* %out) {
@@ -314,8 +314,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_max3_i32_vss:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s1
-; CST_BUS_LIM_1: v_max3_i32 v{{[0-9]+}}, v0, s0, v[[C]]
+; CST_BUS_LIM_1: v_max_i32_e32 v[[MIN:[0-9]+]], s0, v0
+; CST_BUS_LIM_1: v_max_i32_e32 v{{[0-9]+}}, s1, v[[MIN]]
 
 ; CST_BUS_LIM_2: v_max3_i32 v{{[0-9]+}}, v0, s0, s1
 define amdgpu_ps void @test_max3_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
@@ -326,12 +326,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_max3_i32_sss:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_1: v_max3_i32 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-
-; CST_BUS_LIM_2: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_2: v_max3_i32 v{{[0-9]+}}, s0, s1, v[[C]]
+; GCN: s_max_i32 s[[S_MIN:[0-9]+]], s0, s1
+; GCN: s_max_i32 s{{[0-9]+}}, s[[S_MIN]], s2
 define amdgpu_ps void @test_max3_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   %max3 = call i32 @llvm.smax.i32(i32 %max, i32 %c)
@@ -389,15 +385,16 @@
 
 ; GCN-LABEL: {{^}}test_max3_i16_ssv:
 ; SI: v_bfe_i32 v[[C:[0-9]+]], v0, 0, 16
-; SI: s_sext_i32_i16 s[[B_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[B:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[A:[0-9]+]], s0
-; SI: v_mov_b32_e32 v[[B:[0-9]+]], s[[B_TMP]]
-; SI: v_max3_i32 v{{[0-9]+}}, s[[A]], v[[B]], v[[C]]
+; SI: s_max_i32 s[[MAX:[0-9]+]], s[[A]], s[[B]]
+; SI: v_max_i32_e32 v{{[0-9]+}}, s[[MAX]], v[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v[[B]]
 ; VI: v_max_i16_e32 v{{[0-9]+}}, v[[MAX]], v0
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_max3_i16 v{{[0-9]+}}, s0, v[[B]], v0
+; GFX9: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v[[B]]
+; GFX9: v_max_i16_e32 v{{[0-9]+}}, v[[MAX]], v0
 ; GFX10: v_max3_i16 v{{[0-9]+}}, s0, s1, v0
 define amdgpu_ps void @test_max3_i16_ssv(i16 inreg %a, i16 inreg %b, i16 %c, i16 addrspace(1)* %out) {
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -407,15 +404,15 @@
 }
 
 ; GCN-LABEL: {{^}}test_max3_i16_vss:
-; SI: s_sext_i32_i16 s[[C_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[C:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[B:[0-9]+]], s0
 ; SI: v_bfe_i32 v[[A:[0-9]+]], v0, 0, 16
-; SI: v_mov_b32_e32 v[[C:[0-9]+]], s[[C_TMP]]
-; SI: v_max3_i32 v{{[0-9]+}}, v[[A]], s[[B]], v[[C]]
+; SI: v_max_i32_e32 v[[MAX:[0-9]+]], s[[B]], v[[A]]
+; SI: v_max_i32_e32 v{{[0-9]+}}, s[[C]], v[[MAX]]
 ; VI: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v0
 ; VI: v_max_i16_e32 v{{[0-9]+}}, s1, v[[MAX]]
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s1
-; GFX9: v_max3_i16 v{{[0-9]+}}, v0, s0, v[[C]]
+; GFX9: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v0
+; GFX9: v_max_i16_e32 v{{[0-9]+}}, s1, v[[MAX]]
 ; GFX10: v_max3_i16 v{{[0-9]+}}, v0, s0, s1
 define amdgpu_ps void @test_max3_i16_vss(i16 %a, i16 inreg %b, i16 inreg %c, i16 addrspace(1)* %out) {
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -425,20 +422,19 @@
 }
 
 ; GCN-LABEL: {{^}}test_max3_i16_sss:
-; SI: s_sext_i32_i16 s[[C_TMP:[0-9]+]], s2
-; SI: s_sext_i32_i16 s[[B_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[C:[0-9]+]], s2
+; SI: s_sext_i32_i16 s[[B:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[A:[0-9]+]], s0
-; SI: v_mov_b32_e32 v[[B:[0-9]+]], s[[B_TMP]]
-; SI: v_mov_b32_e32 v[[C:[0-9]+]], s[[C_TMP]]
-; SI: v_max3_i32 v{{[0-9]+}}, s[[A]], v[[B]], v[[C]]
+; SI: s_max_i32 s[[S_MIN:[0-9]+]], s[[A]], s[[B]]
+; SI: s_max_i32 s{{[0-9]+}}, s[[S_MIN]], s[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v[[B]]
 ; VI: v_max_i16_e32 v{{[0-9]+}}, s2, v[[MAX]]
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX9: v_max3_i16 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-; GFX10: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX10: v_max3_i16 v{{[0-9]+}}, s0, s1, v[[C]]
+; GFX9: v_max_i16_e32 v[[MAX:[0-9]+]], s0, v[[B]]
+; GFX9: v_max_i16_e32 v{{[0-9]+}}, s2, v[[MAX]]
+; GFX10: v_max_i16 v[[MAX:[0-9]+]], s0, s1
+; GFX10: v_max_i16 v{{[0-9]+}}, v[[MAX]], s2
 define amdgpu_ps void @test_max3_i16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c, i16 addrspace(1)* %out) {
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   %max3 = call i16 @llvm.smax.i16(i16 %max, i16 %c)
Index: llvm/test/CodeGen/AMDGPU/min3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/min3.ll
+++ llvm/test/CodeGen/AMDGPU/min3.ll
@@ -361,8 +361,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_min3_i32_ssv:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_min3_i32 v{{[0-9]+}}, s0, v[[B]], v0
+; CST_BUS_LIM_1: s_min_i32 s[[S_MIN:[0-9]+]], s0, s1
+; CST_BUS_LIM_1: v_min_i32_e32 v{{[0-9]+}}, s[[S_MIN]], v0
 
 ; CST_BUS_LIM_2: v_min3_i32 v{{[0-9]+}}, s0, s1, v0
 define amdgpu_ps void @test_min3_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c, i32 addrspace(1)* %out) {
@@ -373,8 +373,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_min3_i32_vss:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s1
-; CST_BUS_LIM_1: v_min3_i32 v{{[0-9]+}}, v0, s0, v[[C]]
+; CST_BUS_LIM_1: v_min_i32_e32 v[[MIN:[0-9]+]], s0, v0
+; CST_BUS_LIM_1: v_min_i32_e32 v{{[0-9]+}}, s1, v[[MIN]]
 
 ; CST_BUS_LIM_2: v_min3_i32 v{{[0-9]+}}, v0, s0, s1
 define amdgpu_ps void @test_min3_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
@@ -385,12 +385,8 @@
 }
 
 ; GCN-LABEL: {{^}}test_min3_i32_sss:
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; CST_BUS_LIM_1: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_1: v_min3_i32 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-
-; CST_BUS_LIM_2: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; CST_BUS_LIM_2: v_min3_i32 v{{[0-9]+}}, s0, s1, v[[C]]
+; GCN: s_min_i32 s[[S_MIN:[0-9]+]], s0, s1
+; GCN: s_min_i32 s{{[0-9]+}}, s[[S_MIN]], s2
 define amdgpu_ps void @test_min3_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* %out) {
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %min3 = call i32 @llvm.smin.i32(i32 %min, i32 %c)
@@ -448,15 +444,16 @@
 
 ; GCN-LABEL: {{^}}test_min3_i16_ssv:
 ; SI: v_bfe_i32 v[[C:[0-9]+]], v0, 0, 16
-; SI: s_sext_i32_i16 s[[B_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[B:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[A:[0-9]+]], s0
-; SI: v_mov_b32_e32 v[[B:[0-9]+]], s[[B_TMP]]
-; SI: v_min3_i32 v{{[0-9]+}}, s[[A]], v[[B]], v[[C]]
+; SI: s_min_i32 s[[MIN:[0-9]+]], s[[A]], s[[B]]
+; SI: v_min_i32_e32 v{{[0-9]+}}, s[[MIN]], v[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v[[B]]
 ; VI: v_min_i16_e32 v{{[0-9]+}}, v[[MIN]], v0
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_min3_i16 v{{[0-9]+}}, s0, v[[B]], v0
+; GFX9: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v[[B]]
+; GFX9: v_min_i16_e32 v{{[0-9]+}}, v[[MIN]], v0
 ; GFX10: v_min3_i16 v{{[0-9]+}}, s0, s1, v0
 define amdgpu_ps void @test_min3_i16_ssv(i16 inreg %a, i16 inreg %b, i16 %c, i16 addrspace(1)* %out) {
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -466,15 +463,15 @@
 }
 
 ; GCN-LABEL: {{^}}test_min3_i16_vss:
-; SI: s_sext_i32_i16 s[[C_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[C:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[B:[0-9]+]], s0
 ; SI: v_bfe_i32 v[[A:[0-9]+]], v0, 0, 16
-; SI: v_mov_b32_e32 v[[C:[0-9]+]], s[[C_TMP]]
-; SI: v_min3_i32 v{{[0-9]+}}, v[[A]], s[[B]], v[[C]]
+; SI: v_min_i32_e32 v[[MIN:[0-9]+]], s[[B]], v[[A]]
+; SI: v_min_i32_e32 v{{[0-9]+}}, s[[C]], v[[MIN]]
 ; VI: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v0
 ; VI: v_min_i16_e32 v{{[0-9]+}}, s1, v[[MIN]]
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s1
-; GFX9: v_min3_i16 v{{[0-9]+}}, v0, s0, v[[C]]
+; GFX9: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v0
+; GFX9: v_min_i16_e32 v{{[0-9]+}}, s1, v[[MIN]]
 ; GFX10: v_min3_i16 v{{[0-9]+}}, v0, s0, s1
 define amdgpu_ps void @test_min3_i16_vss(i16 %a, i16 inreg %b, i16 inreg %c, i16 addrspace(1)* %out) {
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -484,20 +481,19 @@
 }
 
 ; GCN-LABEL: {{^}}test_min3_i16_sss:
-; SI: s_sext_i32_i16 s[[C_TMP:[0-9]+]], s2
-; SI: s_sext_i32_i16 s[[B_TMP:[0-9]+]], s1
+; SI: s_sext_i32_i16 s[[C:[0-9]+]], s2
+; SI: s_sext_i32_i16 s[[B:[0-9]+]], s1
 ; SI: s_sext_i32_i16 s[[A:[0-9]+]], s0
-; SI: v_mov_b32_e32 v[[B:[0-9]+]], s[[B_TMP]]
-; SI: v_mov_b32_e32 v[[C:[0-9]+]], s[[C_TMP]]
-; SI: v_min3_i32 v{{[0-9]+}}, s[[A]], v[[B]], v[[C]]
+; SI: s_min_i32 s[[S_MIN:[0-9]+]], s[[A]], s[[B]]
+; SI: s_min_i32 s{{[0-9]+}}, s[[S_MIN]], s[[C]]
 ; VI: v_mov_b32_e32 v[[B:[0-9]+]], s1
 ; VI: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v[[B]]
 ; VI: v_min_i16_e32 v{{[0-9]+}}, s2, v[[MIN]]
 ; GFX9: v_mov_b32_e32 v[[B:[0-9]+]], s1
-; GFX9: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX9: v_min3_i16 v{{[0-9]+}}, s0, v[[B]], v[[C]]
-; GFX10: v_mov_b32_e32 v[[C:[0-9]+]], s2
-; GFX10: v_min3_i16 v{{[0-9]+}}, s0, s1, v[[C]]
+; GFX9: v_min_i16_e32 v[[MIN:[0-9]+]], s0, v[[B]]
+; GFX9: v_min_i16_e32 v{{[0-9]+}}, s2, v[[MIN]]
+; GFX10: v_min_i16 v[[MIN:[0-9]+]], s0, s1
+; GFX10: v_min_i16 v{{[0-9]+}}, v[[MIN]], s2
 define amdgpu_ps void @test_min3_i16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c, i16 addrspace(1)* %out) {
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   %min3 = call i16 @llvm.smin.i16(i16 %min, i16 %c)