Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -737,30 +737,6 @@ (BIT_ALIGN $src0, $src0, $src1) >; -multiclass IntMed3Pat { - - // This matches 16 permutations of - // min(max(a, b), max(min(a, b), c)) - def : AMDGPUPat < - (min (max_oneuse vt:$src0, vt:$src1), - (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst vt:$src0, vt:$src1, vt:$src2) ->; - - // This matches 16 permutations of - // max(min(x, y), min(max(x, y), z)) - def : AMDGPUPat < - (max (min_oneuse vt:$src0, vt:$src1), - (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst $src0, $src1, $src2) ->; -} - // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1972,6 +1972,29 @@ defm : BFEPattern ; defm : SHA256MaPattern ; +multiclass IntMed3Pat { + + // This matches 16 permutations of + // min(max(a, b), max(min(a, b), c)) + def : AMDGPUPat < + (min (max_oneuse i32:$src0, i32:$src1), + (max_oneuse (min_oneuse i32:$src0, i32:$src1), i32:$src2)), + (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) +>; + + // This matches 16 permutations of + // max(min(x, y), min(max(x, y), z)) + def : AMDGPUPat < + (max (min_oneuse i32:$src0, i32:$src1), + (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), + (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) +>; +} + defm : IntMed3Pat; defm : IntMed3Pat; @@ -2002,22 +2025,21 @@ SDPatternOperator min, SDPatternOperator max, SDPatternOperator max_oneuse, - SDPatternOperator min_oneuse, - ValueType vt = i16> { + SDPatternOperator min_oneuse> { // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) def : GCNPat < - (max (min_oneuse vt:$src0, vt:$src1), - (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) + (max (min_oneuse i16:$src0, i16:$src1), + (min_oneuse (max_oneuse i16:$src0, i16:$src1), i16:$src2)), + (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; // This matches 16 permutations of // min(max(a, b), max(min(a, b), c)) def : GCNPat < - (min (max_oneuse vt:$src0, vt:$src1), - (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) + (min (max_oneuse i16:$src0, i16:$src1), + (max_oneuse (min_oneuse i16:$src0, i16:$src1), i16:$src2)), + (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s + +--- +name: smed3_s32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MED3_I32_:%[0-9]+]]:vgpr_32 = V_MED3_I32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MED3_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- + +name: smed3_s32_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; GFX6-LABEL: name: smed3_s32_sss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_MIN_I32_1]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_SMAX %0, %1 + %4:sgpr(s32) = G_SMIN %0, %1 + %5:sgpr(s32) = G_SMAX %4, %2 + %6:sgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: smed3_s32_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %3 +... + +--- +name: smed3_s32_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MIN_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %4 +... + +--- +name: smed3_s32_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %5 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: smed3_s16_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]] + ; GFX9-LABEL: name: smed3_s16_vvv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MED3_I16_:%[0-9]+]]:vgpr_32 = V_MED3_I16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MED3_I16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9 +... + +--- +name: smed3_s16_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %6 +... + +--- +name: smed3_s16_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %7 +... + +--- +name: smed3_s16_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse2 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %8 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s + +--- +name: umed3_s32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MED3_U32_:%[0-9]+]]:vgpr_32 = V_MED3_U32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MED3_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- + +name: umed3_s32_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; GFX6-LABEL: name: umed3_s32_sss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_MIN_U32_1]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_UMAX %0, %1 + %4:sgpr(s32) = G_UMIN %0, %1 + %5:sgpr(s32) = G_UMAX %4, %2 + %6:sgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: umed3_s32_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %3 +... + +--- +name: umed3_s32_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MIN_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %4 +... + +--- +name: umed3_s32_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %5 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: umed3_s16_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]] + ; GFX9-LABEL: name: umed3_s16_vvv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MED3_U16_:%[0-9]+]]:vgpr_32 = V_MED3_U16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MED3_U16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9 +... + +--- +name: umed3_s16_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %6 +... + +--- +name: umed3_s16_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %7 +... + +--- +name: umed3_s16_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse2 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %8 +...