diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2950,71 +2950,67 @@ multiclass IntMed3Pat { + SDPatternOperator max> { // This matches 16 permutations of // min(max(a, b), max(min(a, b), c)) def : AMDGPUPat < - (min (max_oneuse i32:$src0, i32:$src1), - (max_oneuse (min_oneuse i32:$src0, i32:$src1), i32:$src2)), + (min (max i32:$src0, i32:$src1), + (max (min i32:$src0, i32:$src1), i32:$src2)), (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) >; // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) def : AMDGPUPat < - (max (min_oneuse i32:$src0, i32:$src1), - (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), + (max (min i32:$src0, i32:$src1), + (min (max i32:$src0, i32:$src1), i32:$src2)), (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) >; } -defm : IntMed3Pat; -defm : IntMed3Pat; +defm : IntMed3Pat; +defm : IntMed3Pat; // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) class FPMed3Pat : GCNPat< - (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), - (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), - (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), - (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), - (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), + (fmaxnum_like (fminnum_like (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (fminnum_like (fmaxnum_like (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; class FP16Med3Pat : GCNPat< - (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (fmaxnum_like (fminnum_like (VOP3Mods_nnan vt:$src0, i32:$src0_mods), (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), - (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), - (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), - (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), + (fminnum_like (fmaxnum_like (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE) >; multiclass Int16Med3Pat { + SDPatternOperator min, + SDPatternOperator max> { // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) def : GCNPat < - (max (min_oneuse i16:$src0, i16:$src1), - (min_oneuse (max_oneuse i16:$src0, i16:$src1), i16:$src2)), + (max (min i16:$src0, i16:$src1), + (min (max i16:$src0, i16:$src1), i16:$src2)), (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; // This matches 16 permutations of // min(max(a, b), max(min(a, b), c)) def : GCNPat < - (min (max_oneuse i16:$src0, i16:$src1), - (max_oneuse (min_oneuse i16:$src0, i16:$src1), i16:$src2)), + (min (max i16:$src0, i16:$src1), + (max (min i16:$src0, i16:$src1), i16:$src2)), (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; } @@ -3052,8 +3048,8 @@ let OtherPredicates = [isGFX9Plus] in { def : FP16Med3Pat; -defm : Int16Med3Pat; -defm : Int16Med3Pat; +defm : Int16Med3Pat; +defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] class AMDGPUGenericInstruction : GenericInstruction { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -75,10 +75,8 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_]] + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -104,11 +102,9 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MIN_I32_e64_]] + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -134,11 +130,10 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_1]] + ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -165,13 +160,9 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY2]], [[V_MAX_I32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_1]], [[V_MIN_I32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MIN_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY3]], [[V_MAX_I32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_2]], [[V_MIN_I32_e64_]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_1]], implicit [[V_MAX_I32_e64_2]] + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MED3_I32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -74,10 +74,8 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -119,11 +117,9 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -165,11 +161,10 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] + ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -75,10 +75,8 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_]] + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -104,11 +102,9 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MIN_U32_e64_]] + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -134,11 +130,10 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_1]] + ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -165,13 +160,9 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY2]], [[V_MAX_U32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_1]], [[V_MIN_U32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MIN_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY3]], [[V_MAX_U32_e64_]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_2]], [[V_MIN_U32_e64_]], implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_1]], implicit [[V_MAX_U32_e64_2]] + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MED3_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -74,10 +74,8 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -119,11 +117,9 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -165,11 +161,10 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] + ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll --- a/llvm/test/CodeGen/AMDGPU/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/smed3.ll @@ -595,7 +595,9 @@ } ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_0: -; GCN-NOT: v_med3_i32 +; GCN: s_min_i32 +; GCN-NOT: {{s_min_i32|s_max_i32}} +; GCN: v_med3_i32 define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @smin(i32 %x, i32 %y) @@ -608,7 +610,9 @@ } ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_1: -; GCN-NOT: v_med3_i32 +; GCN: s_max_i32 +; GCN-NOT: {{s_min_i32|s_max_i32}} +; GCN: v_med3_i32 define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @smin(i32 %x, i32 %y) @@ -621,7 +625,10 @@ } ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_2: -; GCN-NOT: v_med3_i32 +; GCN: s_max_i32 +; GCN: s_min_i32 +; GCN-NOT: {{s_min_i32|s_max_i32}} +; GCN: v_med3_i32 define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @smin(i32 %x, i32 %y) @@ -634,6 +641,7 @@ } ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_result: +; GCN-NOT: {{s_min_i32|s_max_i32}} ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: @@ -646,6 +654,26 @@ ret void } +; GCN-LABEL: {{^}}s_test_smed3_reuse_bounds +; GCN-NOT: {{s_min_i32|s_max_i32}} +; GCN: v_med3_i32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}} +; GCN: v_med3_i32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}} +define amdgpu_kernel void @s_test_smed3_reuse_bounds(i32 addrspace(1)* %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 { +bb: + %lo = call i32 @smin(i32 %b0, i32 %b1) + %hi = call i32 @smax(i32 %b0, i32 %b1) + + %tmp0 = call i32 @smin(i32 %x, i32 %hi) + %z0 = call i32 @smax(i32 %tmp0, i32 %lo) + + %tmp1 = call i32 @smin(i32 %y, i32 %hi) + %z1 = call i32 @smax(i32 %tmp1, i32 %lo) + + store volatile i32 %z0, i32 addrspace(1)* %arg + store volatile i32 %z1, i32 addrspace(1)* %arg + ret void +} + ; GCN-LABEL: {{^}}v_test_smed3_i16_pat_0: ; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll --- a/llvm/test/CodeGen/AMDGPU/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/umed3.ll @@ -581,7 +581,9 @@ } ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_0: -; GCN-NOT: v_med3_u32 +; GCN: s_min_u32 +; GCN-NOT: {{s_min_u32|s_max_u32}} +; GCN: v_med3_u32 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @umin(i32 %x, i32 %y) @@ -594,7 +596,9 @@ } ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_1: -; GCN-NOT: v_med3_u32 +; GCN: s_max_u32 +; GCN-NOT: {{s_min_u32|s_max_u32}} +; GCN: v_med3_u32 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @umin(i32 %x, i32 %y) @@ -607,7 +611,10 @@ } ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_2: -; GCN-NOT: v_med3_u32 +; GCN: s_max_u32 +; GCN: s_min_u32 +; GCN-NOT: {{s_min_u32|s_max_u32}} +; GCN: v_med3_u32 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: %tmp0 = call i32 @umin(i32 %x, i32 %y) @@ -620,6 +627,7 @@ } ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_result: +; GCN-NOT: {{s_min_u32|s_max_u32}} ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_result(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 { bb: @@ -632,6 +640,26 @@ ret void } +; GCN-LABEL: {{^}}s_test_smed3_reuse_bounds +; GCN-NOT: {{s_min_u32|s_max_u32}} +; GCN: v_med3_u32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}} +; GCN: v_med3_u32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}} +define amdgpu_kernel void @s_test_smed3_reuse_bounds(i32 addrspace(1)* %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 { +bb: + %lo = call i32 @umin(i32 %b0, i32 %b1) + %hi = call i32 @umax(i32 %b0, i32 %b1) + + %tmp0 = call i32 @umin(i32 %x, i32 %hi) + %z0 = call i32 @umax(i32 %tmp0, i32 %lo) + + %tmp1 = call i32 @umin(i32 %y, i32 %hi) + %z1 = call i32 @umax(i32 %tmp1, i32 %lo) + + store volatile i32 %z0, i32 addrspace(1)* %arg + store volatile i32 %z1, i32 addrspace(1)* %arg + ret void +} + ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src0: ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}} define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {