diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -84,8 +84,6 @@ bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper, bool Signed) const; - void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const; - Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -591,21 +591,6 @@ return AltMappings; } - case TargetOpcode::G_SMIN: - case TargetOpcode::G_SMAX: - case TargetOpcode::G_UMIN: - case TargetOpcode::G_UMAX: { - static const OpRegBankEntry<3> Table[2] = { - { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 }, - - // Scalar requires cmp+select, and extends if 16-bit. - // FIXME: Should there be separate costs for 32 and 16-bit - { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 } - }; - - const std::array RegSrcOpIdx = { { 0, 1, 2 } }; - return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); - } case TargetOpcode::G_UADDE: case TargetOpcode::G_USUBE: case TargetOpcode::G_SADDE: @@ -1576,23 +1561,8 @@ return true; } -// FIXME: Duplicated from LegalizerHelper -static CmpInst::Predicate minMaxToCompare(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_SMIN: - return CmpInst::ICMP_SLT; - case TargetOpcode::G_SMAX: - return CmpInst::ICMP_SGT; - case TargetOpcode::G_UMIN: - return CmpInst::ICMP_ULT; - case TargetOpcode::G_UMAX: - return CmpInst::ICMP_UGT; - default: - llvm_unreachable("not in integer min/max"); - } -} - -static unsigned minMaxToExtend(unsigned Opc) { +// Return a suitable opcode for extending the operands of Opc when widening. +static unsigned getExtendOp(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: @@ -1601,7 +1571,7 @@ case TargetOpcode::G_UMAX: return TargetOpcode::G_ZEXT; default: - llvm_unreachable("not in integer min/max"); + return TargetOpcode::G_ANYEXT; } } @@ -1628,30 +1598,6 @@ return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0)); } -static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B, - CmpInst::Predicate Pred, - Register Dst, Register Src0, - Register Src1) { - const LLT CmpType = LLT::scalar(32); - auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1); - return B.buildSelect(Dst, Cmp, Src0, Src1); -} - -// FIXME: Duplicated from LegalizerHelper, except changing the boolean type. -void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B, - MachineInstr &MI) const { - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); - - const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); - MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1); - - Register CmpReg = Sel->getOperand(1).getReg(); - B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank); - MI.eraseFromParent(); -} - // For cases where only a single copy is inserted for matching register banks. // Replace the register in the instruction operand static bool substituteSimpleCopyRegs( @@ -2341,7 +2287,11 @@ case AMDGPU::G_MUL: case AMDGPU::G_SHL: case AMDGPU::G_LSHR: - case AMDGPU::G_ASHR: { + case AMDGPU::G_ASHR: + case AMDGPU::G_SMIN: + case AMDGPU::G_SMAX: + case AMDGPU::G_UMIN: + case AMDGPU::G_UMAX: { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); @@ -2365,10 +2315,11 @@ Register WideSrc0Lo, WideSrc0Hi; Register WideSrc1Lo, WideSrc1Hi; + unsigned ExtendOp = getExtendOp(MI.getOpcode()); std::tie(WideSrc0Lo, WideSrc0Hi) - = unpackV2S16ToS32(B, MI.getOperand(1).getReg(), AMDGPU::G_ANYEXT); + = unpackV2S16ToS32(B, MI.getOperand(1).getReg(), ExtendOp); std::tie(WideSrc1Lo, WideSrc1Hi) - = unpackV2S16ToS32(B, MI.getOperand(2).getReg(), AMDGPU::G_ANYEXT); + = unpackV2S16ToS32(B, MI.getOperand(2).getReg(), ExtendOp); auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo}); auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi}); B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)}); @@ -2390,73 +2341,6 @@ return; } - case AMDGPU::G_SMIN: - case AMDGPU::G_SMAX: - case AMDGPU::G_UMIN: - case AMDGPU::G_UMAX: { - Register DstReg = MI.getOperand(0).getReg(); - const RegisterBank *DstBank = - OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; - if (DstBank == &AMDGPU::VGPRRegBank) - break; - - MachineFunction *MF = MI.getParent()->getParent(); - MachineIRBuilder B(MI); - - // Turn scalar min/max into a compare and select. - LLT Ty = MRI.getType(DstReg); - const LLT S32 = LLT::scalar(32); - const LLT S16 = LLT::scalar(16); - const LLT V2S16 = LLT::vector(2, 16); - - if (Ty == V2S16) { - ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); - B.setChangeObserver(ApplySALU); - - // Need to widen to s32, and expand as cmp + select, and avoid producing - // illegal vector extends or unmerges that would need further - // legalization. - // - // TODO: Should we just readfirstlane? That should probably be handled - // with a UniformVGPR register bank that wouldn't need special - // consideration here. - - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - Register Src1 = MI.getOperand(2).getReg(); - - Register WideSrc0Lo, WideSrc0Hi; - Register WideSrc1Lo, WideSrc1Hi; - - unsigned ExtendOp = minMaxToExtend(MI.getOpcode()); - - std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp); - std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp); - - Register Lo = MRI.createGenericVirtualRegister(S32); - Register Hi = MRI.createGenericVirtualRegister(S32); - const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); - buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo); - buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi); - - B.buildBuildVectorTrunc(Dst, {Lo, Hi}); - MI.eraseFromParent(); - } else if (Ty == S16) { - ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); - B.setChangeObserver(ApplySALU); - LegalizerHelper Helper(*MF, ApplySALU, B); - - // Need to widen to s32, and expand as cmp + select. - if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized) - llvm_unreachable("widenScalar should have succeeded"); - - // FIXME: This is relying on widenScalar leaving MI in place. - lowerScalarMinMax(B, MI); - } else - lowerScalarMinMax(B, MI); - - return; - } case AMDGPU::G_SEXT_INREG: { SmallVector SrcRegs(OpdMapper.getVRegs(1)); if (SrcRegs.empty()) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir @@ -13,8 +13,7 @@ ; CHECK-LABEL: name: smax_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] + ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SMAX %0, %1 @@ -90,9 +89,8 @@ ; CHECK-LABEL: name: smax_s32_ss_vgpr_use ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SMAX %0, %1 @@ -114,9 +112,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]] + ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $sgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -144,9 +141,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]] + ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -178,11 +174,9 @@ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]] - ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]] - ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32) ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s -# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s --- name: smin_s32_ss @@ -13,9 +13,8 @@ ; CHECK-LABEL: name: smin_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $sgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK: $sgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SMIN %0, %1 @@ -93,9 +92,8 @@ ; CHECK-LABEL: name: smin_s32_ss_vgpr_use ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SMIN %0, %1 @@ -117,9 +115,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]] + ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $sgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -147,9 +144,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]] + ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -181,11 +177,9 @@ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] - ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]] - ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32) ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir @@ -13,9 +13,8 @@ ; CHECK-LABEL: name: umax_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $sgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK: $sgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_UMAX %0, %1 @@ -93,9 +92,8 @@ ; CHECK-LABEL: name: umax_s32_ss_vgpr_use ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_UMAX %0, %1 @@ -117,9 +115,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]] + ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $sgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -147,9 +144,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]] + ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -183,11 +179,9 @@ ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]] - ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]] - ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]] - ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]] + ; CHECK: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32) ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir @@ -13,9 +13,8 @@ ; CHECK-LABEL: name: umin_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $sgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK: $sgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_UMIN %0, %1 @@ -97,9 +96,8 @@ ; CHECK-LABEL: name: umin_s32_ss_vgpr_use ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SELECT]](s32) + ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_UMIN %0, %1 @@ -121,9 +119,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]] + ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $sgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -151,9 +148,8 @@ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32) - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]] + ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $sgpr0 @@ -187,11 +183,9 @@ ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] - ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]] - ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]] - ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]] - ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]] + ; CHECK: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32) ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -61,17 +61,13 @@ ; GFX6-LABEL: s_saddsat_i7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 25 +; GFX6-NEXT: s_min_i32 s3, s0, 0 +; GFX6-NEXT: s_max_i32 s2, s0, 0 ; GFX6-NEXT: s_lshl_b32 s1, s1, 25 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 -; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s3, s0, 0 ; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX6-NEXT: s_cmp_gt_i32 s3, s1 -; GFX6-NEXT: s_cselect_b32 s1, s3, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s1, s1, s2 +; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX6-NEXT: s_max_i32 s1, s3, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 25 ; GFX6-NEXT: ; return to shader part epilog @@ -80,23 +76,19 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, 0 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s5, s3, s4 -; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_max_i32 s5, s3, s4 +; GFX8-NEXT: s_min_i32 s3, s3, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s1 -; GFX8-NEXT: s_cselect_b32 s1, s3, s1 +; GFX8-NEXT: s_max_i32 s1, s3, s1 +; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s5 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_add_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2 @@ -183,17 +175,13 @@ ; GFX6-LABEL: s_saddsat_i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 +; GFX6-NEXT: s_min_i32 s3, s0, 0 +; GFX6-NEXT: s_max_i32 s2, s0, 0 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 -; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s3, s0, 0 ; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX6-NEXT: s_cmp_gt_i32 s3, s1 -; GFX6-NEXT: s_cselect_b32 s1, s3, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s1, s1, s2 +; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX6-NEXT: s_max_i32 s1, s3, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: ; return to shader part epilog @@ -202,23 +190,19 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, 0 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s5, s3, s4 -; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_max_i32 s5, s3, s4 +; GFX8-NEXT: s_min_i32 s3, s3, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s1 -; GFX8-NEXT: s_cselect_b32 s1, s3, s1 +; GFX8-NEXT: s_max_i32 s1, s3, s1 +; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s5 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_add_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2 @@ -360,38 +344,30 @@ ; GFX6-LABEL: s_saddsat_v2i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshr_b32 s2, s0, 8 -; GFX6-NEXT: s_lshr_b32 s3, s1, 8 ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 +; GFX6-NEXT: s_brev_b32 s5, 1 +; GFX6-NEXT: s_min_i32 s7, s0, 0 +; GFX6-NEXT: s_lshr_b32 s3, s1, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s7, s5, s7 ; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, 0 +; GFX6-NEXT: s_max_i32 s6, s0, 0 ; GFX6-NEXT: s_sub_i32 s6, s4, s6 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, 0 -; GFX6-NEXT: s_sub_i32 s7, s5, s7 -; GFX6-NEXT: s_cmp_gt_i32 s7, s1 -; GFX6-NEXT: s_cselect_b32 s1, s7, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s6 -; GFX6-NEXT: s_cselect_b32 s1, s1, s6 +; GFX6-NEXT: s_max_i32 s1, s7, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s6 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s3, s1, 0 +; GFX6-NEXT: s_max_i32 s3, s1, 0 ; GFX6-NEXT: s_sub_i32 s3, s4, s3 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s4, s1, 0 +; GFX6-NEXT: s_min_i32 s4, s1, 0 ; GFX6-NEXT: s_sub_i32 s4, s5, s4 -; GFX6-NEXT: s_cmp_gt_i32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s2, s4, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s2, s2, s3 +; GFX6-NEXT: s_max_i32 s2, s4, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s3 ; GFX6-NEXT: s_add_i32 s1, s1, s2 -; GFX6-NEXT: s_movk_i32 s2, 0xff ; GFX6-NEXT: s_ashr_i32 s1, s1, 24 +; GFX6-NEXT: s_movk_i32 s2, 0xff +; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_and_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 @@ -403,50 +379,42 @@ ; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000 ; GFX8-NEXT: s_lshr_b32 s2, s0, 8 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4 -; GFX8-NEXT: s_lshr_b32 s3, s1, 8 -; GFX8-NEXT: s_lshl_b32 s1, s1, s4 ; GFX8-NEXT: s_sext_i32_i16 s7, s0 ; GFX8-NEXT: s_sext_i32_i16 s8, 0 -; GFX8-NEXT: s_cmp_gt_i32 s7, s8 -; GFX8-NEXT: s_movk_i32 s5, 0x7fff -; GFX8-NEXT: s_cselect_b32 s9, s7, s8 -; GFX8-NEXT: s_sub_i32 s9, s5, s9 -; GFX8-NEXT: s_cmp_lt_i32 s7, s8 +; GFX8-NEXT: s_max_i32 s9, s7, s8 ; GFX8-NEXT: s_movk_i32 s6, 0x8000 -; GFX8-NEXT: s_cselect_b32 s7, s7, s8 +; GFX8-NEXT: s_min_i32 s7, s7, s8 ; GFX8-NEXT: s_sub_i32 s7, s6, s7 +; GFX8-NEXT: s_lshr_b32 s3, s1, 8 +; GFX8-NEXT: s_lshl_b32 s1, s1, s4 +; GFX8-NEXT: s_movk_i32 s5, 0x7fff ; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s7, s1 -; GFX8-NEXT: s_cselect_b32 s1, s7, s1 +; GFX8-NEXT: s_max_i32 s1, s7, s1 +; GFX8-NEXT: s_sub_i32 s9, s5, s9 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s7, s9 -; GFX8-NEXT: s_cmp_lt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s1, s1, s7 +; GFX8-NEXT: s_min_i32 s1, s1, s7 ; GFX8-NEXT: s_add_i32 s0, s0, s1 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_lshl_b32 s1, s2, s4 ; GFX8-NEXT: s_lshl_b32 s2, s3, s4 -; GFX8-NEXT: s_ashr_i32 s0, s0, s4 ; GFX8-NEXT: s_sext_i32_i16 s3, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s7, s3, s8 -; GFX8-NEXT: s_sub_i32 s5, s5, s7 -; GFX8-NEXT: s_cmp_lt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s3, s3, s8 +; GFX8-NEXT: s_max_i32 s7, s3, s8 +; GFX8-NEXT: s_min_i32 s3, s3, s8 ; GFX8-NEXT: s_sub_i32 s3, s6, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s3, s2 -; GFX8-NEXT: s_cselect_b32 s2, s3, s2 +; GFX8-NEXT: s_max_i32 s2, s3, s2 +; GFX8-NEXT: s_sub_i32 s5, s5, s7 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s5 -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_add_i32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_movk_i32 s2, 0xff +; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s1, s1, s4 +; GFX8-NEXT: s_movk_i32 s2, 0xff +; GFX8-NEXT: s_ashr_i32 s0, s0, s4 ; GFX8-NEXT: s_and_b32 s1, s1, s2 ; GFX8-NEXT: s_and_b32 s0, s0, s2 ; GFX8-NEXT: s_lshl_b32 s1, s1, s4 @@ -714,68 +682,52 @@ ; GFX6-NEXT: s_lshr_b32 s2, s0, 8 ; GFX6-NEXT: s_lshr_b32 s3, s0, 16 ; GFX6-NEXT: s_lshr_b32 s4, s0, 24 +; GFX6-NEXT: s_lshl_b32 s0, s0, 24 +; GFX6-NEXT: s_brev_b32 s9, 1 +; GFX6-NEXT: s_min_i32 s11, s0, 0 ; GFX6-NEXT: s_lshr_b32 s5, s1, 8 ; GFX6-NEXT: s_lshr_b32 s6, s1, 16 ; GFX6-NEXT: s_lshr_b32 s7, s1, 24 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s11, s9, s11 ; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, 0 +; GFX6-NEXT: s_max_i32 s10, s0, 0 ; GFX6-NEXT: s_sub_i32 s10, s8, s10 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, 0 -; GFX6-NEXT: s_sub_i32 s11, s9, s11 -; GFX6-NEXT: s_cmp_gt_i32 s11, s1 -; GFX6-NEXT: s_cselect_b32 s1, s11, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s10 -; GFX6-NEXT: s_cselect_b32 s1, s1, s10 +; GFX6-NEXT: s_max_i32 s1, s11, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s10 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 +; GFX6-NEXT: s_min_i32 s10, s1, 0 ; GFX6-NEXT: s_lshl_b32 s2, s5, 24 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s5, s1, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s10, s1, 0 +; GFX6-NEXT: s_max_i32 s5, s1, 0 ; GFX6-NEXT: s_sub_i32 s10, s9, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s2 -; GFX6-NEXT: s_cselect_b32 s2, s10, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s5 -; GFX6-NEXT: s_cselect_b32 s2, s2, s5 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s2, s10, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s5 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_ashr_i32 s1, s1, 24 ; GFX6-NEXT: s_lshl_b32 s3, s6, 24 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s5, s2, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s6, s2, 0 +; GFX6-NEXT: s_min_i32 s6, s2, 0 +; GFX6-NEXT: s_max_i32 s5, s2, 0 ; GFX6-NEXT: s_sub_i32 s6, s9, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s3 -; GFX6-NEXT: s_cselect_b32 s3, s6, s3 -; GFX6-NEXT: s_cmp_lt_i32 s3, s5 -; GFX6-NEXT: s_cselect_b32 s3, s3, s5 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s3, s6, s3 +; GFX6-NEXT: s_min_i32 s3, s3, s5 ; GFX6-NEXT: s_add_i32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 -; GFX6-NEXT: s_ashr_i32 s2, s2, 24 +; GFX6-NEXT: s_min_i32 s6, s3, 0 +; GFX6-NEXT: s_max_i32 s5, s3, 0 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s5, s3, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s6, s3, 0 ; GFX6-NEXT: s_sub_i32 s6, s9, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s4, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s4, s6, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s5 ; GFX6-NEXT: s_add_i32 s3, s3, s4 +; GFX6-NEXT: s_ashr_i32 s1, s1, 24 ; GFX6-NEXT: s_movk_i32 s4, 0xff +; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_ashr_i32 s2, s2, 24 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s1 @@ -795,91 +747,75 @@ ; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_lshr_b32 s4, s0, 24 ; GFX8-NEXT: s_lshl_b32 s0, s0, s8 +; GFX8-NEXT: s_sext_i32_i16 s11, s0 +; GFX8-NEXT: s_sext_i32_i16 s12, 0 +; GFX8-NEXT: s_max_i32 s13, s11, s12 +; GFX8-NEXT: s_movk_i32 s10, 0x8000 +; GFX8-NEXT: s_min_i32 s11, s11, s12 +; GFX8-NEXT: s_sub_i32 s11, s10, s11 ; GFX8-NEXT: s_lshr_b32 s5, s1, 8 ; GFX8-NEXT: s_lshr_b32 s6, s1, 16 ; GFX8-NEXT: s_lshr_b32 s7, s1, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, s8 -; GFX8-NEXT: s_sext_i32_i16 s11, s0 -; GFX8-NEXT: s_sext_i32_i16 s12, 0 -; GFX8-NEXT: s_cmp_gt_i32 s11, s12 ; GFX8-NEXT: s_movk_i32 s9, 0x7fff -; GFX8-NEXT: s_cselect_b32 s13, s11, s12 -; GFX8-NEXT: s_sub_i32 s13, s9, s13 -; GFX8-NEXT: s_cmp_lt_i32 s11, s12 -; GFX8-NEXT: s_movk_i32 s10, 0x8000 -; GFX8-NEXT: s_cselect_b32 s11, s11, s12 -; GFX8-NEXT: s_sub_i32 s11, s10, s11 ; GFX8-NEXT: s_sext_i32_i16 s11, s11 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s11, s1 -; GFX8-NEXT: s_cselect_b32 s1, s11, s1 +; GFX8-NEXT: s_max_i32 s1, s11, s1 +; GFX8-NEXT: s_sub_i32 s13, s9, s13 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s11, s13 -; GFX8-NEXT: s_cmp_lt_i32 s1, s11 -; GFX8-NEXT: s_cselect_b32 s1, s1, s11 +; GFX8-NEXT: s_min_i32 s1, s1, s11 ; GFX8-NEXT: s_add_i32 s0, s0, s1 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_lshl_b32 s1, s2, s8 ; GFX8-NEXT: s_lshl_b32 s2, s5, s8 -; GFX8-NEXT: s_ashr_i32 s0, s0, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s1 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s11, s5, s12 -; GFX8-NEXT: s_sub_i32 s11, s9, s11 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 +; GFX8-NEXT: s_max_i32 s11, s5, s12 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s5, s2 -; GFX8-NEXT: s_cselect_b32 s2, s5, s2 +; GFX8-NEXT: s_max_i32 s2, s5, s2 +; GFX8-NEXT: s_sub_i32 s11, s9, s11 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s5, s11 -; GFX8-NEXT: s_cmp_lt_i32 s2, s5 -; GFX8-NEXT: s_cselect_b32 s2, s2, s5 +; GFX8-NEXT: s_min_i32 s2, s2, s5 ; GFX8-NEXT: s_add_i32 s1, s1, s2 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_lshl_b32 s2, s3, s8 -; GFX8-NEXT: s_lshl_b32 s3, s6, s8 -; GFX8-NEXT: s_ashr_i32 s1, s1, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s2 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s6, s5, s12 -; GFX8-NEXT: s_sub_i32 s6, s9, s6 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 +; GFX8-NEXT: s_lshl_b32 s3, s6, s8 +; GFX8-NEXT: s_max_i32 s6, s5, s12 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s5, s3 -; GFX8-NEXT: s_cselect_b32 s3, s5, s3 +; GFX8-NEXT: s_max_i32 s3, s5, s3 +; GFX8-NEXT: s_sub_i32 s6, s9, s6 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s5, s6 -; GFX8-NEXT: s_cmp_lt_i32 s3, s5 -; GFX8-NEXT: s_cselect_b32 s3, s3, s5 +; GFX8-NEXT: s_min_i32 s3, s3, s5 ; GFX8-NEXT: s_add_i32 s2, s2, s3 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_lshl_b32 s3, s4, s8 -; GFX8-NEXT: s_lshl_b32 s4, s7, s8 -; GFX8-NEXT: s_ashr_i32 s2, s2, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s3 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s6, s5, s12 -; GFX8-NEXT: s_sub_i32 s6, s9, s6 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 +; GFX8-NEXT: s_max_i32 s6, s5, s12 +; GFX8-NEXT: s_min_i32 s5, s5, s12 +; GFX8-NEXT: s_lshl_b32 s4, s7, s8 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s5, s4 -; GFX8-NEXT: s_cselect_b32 s4, s5, s4 +; GFX8-NEXT: s_max_i32 s4, s5, s4 +; GFX8-NEXT: s_sub_i32 s6, s9, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s6 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_sext_i32_i16 s1, s1 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s3, s3, s4 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_ashr_i32 s1, s1, s8 ; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_ashr_i32 s0, s0, s8 +; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_and_b32 s1, s1, s4 +; GFX8-NEXT: s_ashr_i32 s2, s2, s8 ; GFX8-NEXT: s_and_b32 s0, s0, s4 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 @@ -1046,17 +982,13 @@ ; GFX6-LABEL: s_saddsat_i24: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 +; GFX6-NEXT: s_min_i32 s3, s0, 0 +; GFX6-NEXT: s_max_i32 s2, s0, 0 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 -; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s3, s0, 0 ; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX6-NEXT: s_cmp_gt_i32 s3, s1 -; GFX6-NEXT: s_cselect_b32 s1, s3, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s1, s1, s2 +; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX6-NEXT: s_max_i32 s1, s3, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 8 ; GFX6-NEXT: ; return to shader part epilog @@ -1159,31 +1091,23 @@ ; GCN-NEXT: ; return to shader part epilog ; GFX6-LABEL: s_saddsat_i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 -; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s3, s0, 0 +; GFX6-NEXT: s_min_i32 s3, s0, 0 +; GFX6-NEXT: s_max_i32 s2, s0, 0 ; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX6-NEXT: s_cmp_gt_i32 s3, s1 -; GFX6-NEXT: s_cselect_b32 s1, s3, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s1, s1, s2 +; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX6-NEXT: s_max_i32 s1, s3, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_cselect_b32 s2, s0, 0 -; GFX8-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 -; GFX8-NEXT: s_cselect_b32 s3, s0, 0 +; GFX8-NEXT: s_min_i32 s3, s0, 0 +; GFX8-NEXT: s_max_i32 s2, s0, 0 ; GFX8-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX8-NEXT: s_cmp_gt_i32 s3, s1 -; GFX8-NEXT: s_cselect_b32 s1, s3, s1 -; GFX8-NEXT: s_cmp_lt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s1, s1, s2 +; GFX8-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX8-NEXT: s_max_i32 s1, s3, s1 +; GFX8-NEXT: s_min_i32 s1, s1, s2 ; GFX8-NEXT: s_add_i32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1206,12 +1130,10 @@ define amdgpu_ps float @saddsat_i32_sv(i32 inreg %lhs, i32 %rhs) { ; GFX6-LABEL: saddsat_i32_sv: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s1, s0, 0 -; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 +; GFX6-NEXT: s_min_i32 s2, s0, 0 +; GFX6-NEXT: s_max_i32 s1, s0, 0 ; GFX6-NEXT: s_sub_i32 s2, 0x80000000, s2 +; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1 ; GFX6-NEXT: v_max_i32_e32 v0, s2, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s1, v0 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 @@ -1219,12 +1141,10 @@ ; ; GFX8-LABEL: saddsat_i32_sv: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_cselect_b32 s1, s0, 0 -; GFX8-NEXT: s_sub_i32 s1, 0x7fffffff, s1 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 -; GFX8-NEXT: s_cselect_b32 s2, s0, 0 +; GFX8-NEXT: s_min_i32 s2, s0, 0 +; GFX8-NEXT: s_max_i32 s1, s0, 0 ; GFX8-NEXT: s_sub_i32 s2, 0x80000000, s2 +; GFX8-NEXT: s_sub_i32 s1, 0x7fffffff, s1 ; GFX8-NEXT: v_max_i32_e32 v0, s2, v0 ; GFX8-NEXT: v_min_i32_e32 v0, s1, v0 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0 @@ -1345,57 +1265,41 @@ define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v2i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, 0 -; GFX6-NEXT: s_sub_i32 s6, s4, s6 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, 0 +; GFX6-NEXT: s_min_i32 s7, s0, 0 ; GFX6-NEXT: s_sub_i32 s7, s5, s7 -; GFX6-NEXT: s_cmp_gt_i32 s7, s2 -; GFX6-NEXT: s_cselect_b32 s2, s7, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s2, s2, s6 +; GFX6-NEXT: s_brev_b32 s4, -2 +; GFX6-NEXT: s_max_i32 s6, s0, 0 +; GFX6-NEXT: s_sub_i32 s6, s4, s6 +; GFX6-NEXT: s_max_i32 s2, s7, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s6 ; GFX6-NEXT: s_add_i32 s0, s0, s2 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s2, s1, 0 +; GFX6-NEXT: s_max_i32 s2, s1, 0 ; GFX6-NEXT: s_sub_i32 s2, s4, s2 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s4, s1, 0 +; GFX6-NEXT: s_min_i32 s4, s1, 0 ; GFX6-NEXT: s_sub_i32 s4, s5, s4 -; GFX6-NEXT: s_cmp_gt_i32 s4, s3 -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 -; GFX6-NEXT: s_cmp_lt_i32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 +; GFX6-NEXT: s_max_i32 s3, s4, s3 +; GFX6-NEXT: s_min_i32 s2, s3, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_v2i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_brev_b32 s4, -2 -; GFX8-NEXT: s_cselect_b32 s6, s0, 0 -; GFX8-NEXT: s_sub_i32 s6, s4, s6 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 ; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: s_cselect_b32 s7, s0, 0 +; GFX8-NEXT: s_min_i32 s7, s0, 0 ; GFX8-NEXT: s_sub_i32 s7, s5, s7 -; GFX8-NEXT: s_cmp_gt_i32 s7, s2 -; GFX8-NEXT: s_cselect_b32 s2, s7, s2 -; GFX8-NEXT: s_cmp_lt_i32 s2, s6 -; GFX8-NEXT: s_cselect_b32 s2, s2, s6 +; GFX8-NEXT: s_brev_b32 s4, -2 +; GFX8-NEXT: s_max_i32 s6, s0, 0 +; GFX8-NEXT: s_sub_i32 s6, s4, s6 +; GFX8-NEXT: s_max_i32 s2, s7, s2 +; GFX8-NEXT: s_min_i32 s2, s2, s6 ; GFX8-NEXT: s_add_i32 s0, s0, s2 -; GFX8-NEXT: s_cmp_gt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s2, s1, 0 +; GFX8-NEXT: s_max_i32 s2, s1, 0 ; GFX8-NEXT: s_sub_i32 s2, s4, s2 -; GFX8-NEXT: s_cmp_lt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s4, s1, 0 +; GFX8-NEXT: s_min_i32 s4, s1, 0 ; GFX8-NEXT: s_sub_i32 s4, s5, s4 -; GFX8-NEXT: s_cmp_gt_i32 s4, s3 -; GFX8-NEXT: s_cselect_b32 s3, s4, s3 -; GFX8-NEXT: s_cmp_lt_i32 s3, s2 -; GFX8-NEXT: s_cselect_b32 s2, s3, s2 +; GFX8-NEXT: s_max_i32 s3, s4, s3 +; GFX8-NEXT: s_min_i32 s2, s3, s2 ; GFX8-NEXT: s_add_i32 s1, s1, s2 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1500,79 +1404,55 @@ define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v3i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s6, -2 -; GFX6-NEXT: s_cselect_b32 s8, s0, 0 -; GFX6-NEXT: s_sub_i32 s8, s6, s8 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s7, 1 -; GFX6-NEXT: s_cselect_b32 s9, s0, 0 +; GFX6-NEXT: s_min_i32 s9, s0, 0 ; GFX6-NEXT: s_sub_i32 s9, s7, s9 -; GFX6-NEXT: s_cmp_gt_i32 s9, s3 -; GFX6-NEXT: s_cselect_b32 s3, s9, s3 -; GFX6-NEXT: s_cmp_lt_i32 s3, s8 -; GFX6-NEXT: s_cselect_b32 s3, s3, s8 +; GFX6-NEXT: s_brev_b32 s6, -2 +; GFX6-NEXT: s_max_i32 s8, s0, 0 +; GFX6-NEXT: s_sub_i32 s8, s6, s8 +; GFX6-NEXT: s_max_i32 s3, s9, s3 +; GFX6-NEXT: s_min_i32 s3, s3, s8 +; GFX6-NEXT: s_min_i32 s8, s1, 0 ; GFX6-NEXT: s_add_i32 s0, s0, s3 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s3, s1, 0 -; GFX6-NEXT: s_sub_i32 s3, s6, s3 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s8, s1, 0 +; GFX6-NEXT: s_max_i32 s3, s1, 0 ; GFX6-NEXT: s_sub_i32 s8, s7, s8 -; GFX6-NEXT: s_cmp_gt_i32 s8, s4 -; GFX6-NEXT: s_cselect_b32 s4, s8, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s3 -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 -; GFX6-NEXT: s_add_i32 s1, s1, s3 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s3, s2, 0 ; GFX6-NEXT: s_sub_i32 s3, s6, s3 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s4, s2, 0 +; GFX6-NEXT: s_max_i32 s4, s8, s4 +; GFX6-NEXT: s_min_i32 s3, s4, s3 +; GFX6-NEXT: s_min_i32 s4, s2, 0 ; GFX6-NEXT: s_sub_i32 s4, s7, s4 -; GFX6-NEXT: s_cmp_gt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 -; GFX6-NEXT: s_cmp_lt_i32 s4, s3 -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 +; GFX6-NEXT: s_add_i32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s3, s2, 0 +; GFX6-NEXT: s_sub_i32 s3, s6, s3 +; GFX6-NEXT: s_max_i32 s4, s4, s5 +; GFX6-NEXT: s_min_i32 s3, s4, s3 ; GFX6-NEXT: s_add_i32 s2, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_v3i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_brev_b32 s6, -2 -; GFX8-NEXT: s_cselect_b32 s8, s0, 0 -; GFX8-NEXT: s_sub_i32 s8, s6, s8 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 ; GFX8-NEXT: s_brev_b32 s7, 1 -; GFX8-NEXT: s_cselect_b32 s9, s0, 0 +; GFX8-NEXT: s_min_i32 s9, s0, 0 ; GFX8-NEXT: s_sub_i32 s9, s7, s9 -; GFX8-NEXT: s_cmp_gt_i32 s9, s3 -; GFX8-NEXT: s_cselect_b32 s3, s9, s3 -; GFX8-NEXT: s_cmp_lt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s3, s3, s8 +; GFX8-NEXT: s_brev_b32 s6, -2 +; GFX8-NEXT: s_max_i32 s8, s0, 0 +; GFX8-NEXT: s_sub_i32 s8, s6, s8 +; GFX8-NEXT: s_max_i32 s3, s9, s3 +; GFX8-NEXT: s_min_i32 s3, s3, s8 +; GFX8-NEXT: s_min_i32 s8, s1, 0 ; GFX8-NEXT: s_add_i32 s0, s0, s3 -; GFX8-NEXT: s_cmp_gt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s3, s1, 0 -; GFX8-NEXT: s_sub_i32 s3, s6, s3 -; GFX8-NEXT: s_cmp_lt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s8, s1, 0 +; GFX8-NEXT: s_max_i32 s3, s1, 0 ; GFX8-NEXT: s_sub_i32 s8, s7, s8 -; GFX8-NEXT: s_cmp_gt_i32 s8, s4 -; GFX8-NEXT: s_cselect_b32 s4, s8, s4 -; GFX8-NEXT: s_cmp_lt_i32 s4, s3 -; GFX8-NEXT: s_cselect_b32 s3, s4, s3 -; GFX8-NEXT: s_add_i32 s1, s1, s3 -; GFX8-NEXT: s_cmp_gt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s3, s2, 0 ; GFX8-NEXT: s_sub_i32 s3, s6, s3 -; GFX8-NEXT: s_cmp_lt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s4, s2, 0 +; GFX8-NEXT: s_max_i32 s4, s8, s4 +; GFX8-NEXT: s_min_i32 s3, s4, s3 +; GFX8-NEXT: s_min_i32 s4, s2, 0 ; GFX8-NEXT: s_sub_i32 s4, s7, s4 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s3 -; GFX8-NEXT: s_cselect_b32 s3, s4, s3 +; GFX8-NEXT: s_add_i32 s1, s1, s3 +; GFX8-NEXT: s_max_i32 s3, s2, 0 +; GFX8-NEXT: s_sub_i32 s3, s6, s3 +; GFX8-NEXT: s_max_i32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s3, s4, s3 ; GFX8-NEXT: s_add_i32 s2, s2, s3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1698,101 +1578,69 @@ define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, 0 -; GFX6-NEXT: s_sub_i32 s10, s8, s10 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, 0 +; GFX6-NEXT: s_min_i32 s11, s0, 0 ; GFX6-NEXT: s_sub_i32 s11, s9, s11 -; GFX6-NEXT: s_cmp_gt_i32 s11, s4 -; GFX6-NEXT: s_cselect_b32 s4, s11, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s10 -; GFX6-NEXT: s_cselect_b32 s4, s4, s10 +; GFX6-NEXT: s_brev_b32 s8, -2 +; GFX6-NEXT: s_max_i32 s10, s0, 0 +; GFX6-NEXT: s_sub_i32 s10, s8, s10 +; GFX6-NEXT: s_max_i32 s4, s11, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s10 +; GFX6-NEXT: s_min_i32 s10, s1, 0 ; GFX6-NEXT: s_add_i32 s0, s0, s4 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s4, s1, 0 -; GFX6-NEXT: s_sub_i32 s4, s8, s4 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s10, s1, 0 +; GFX6-NEXT: s_max_i32 s4, s1, 0 ; GFX6-NEXT: s_sub_i32 s10, s9, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s5 -; GFX6-NEXT: s_cselect_b32 s5, s10, s5 -; GFX6-NEXT: s_cmp_lt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 +; GFX6-NEXT: s_sub_i32 s4, s8, s4 +; GFX6-NEXT: s_max_i32 s5, s10, s5 +; GFX6-NEXT: s_min_i32 s4, s5, s4 +; GFX6-NEXT: s_min_i32 s5, s2, 0 +; GFX6-NEXT: s_sub_i32 s5, s9, s5 ; GFX6-NEXT: s_add_i32 s1, s1, s4 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s4, s2, 0 +; GFX6-NEXT: s_max_i32 s4, s2, 0 ; GFX6-NEXT: s_sub_i32 s4, s8, s4 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s5, s2, 0 +; GFX6-NEXT: s_max_i32 s5, s5, s6 +; GFX6-NEXT: s_min_i32 s4, s5, s4 +; GFX6-NEXT: s_min_i32 s5, s3, 0 ; GFX6-NEXT: s_sub_i32 s5, s9, s5 -; GFX6-NEXT: s_cmp_gt_i32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 -; GFX6-NEXT: s_cmp_lt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 ; GFX6-NEXT: s_add_i32 s2, s2, s4 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s4, s3, 0 +; GFX6-NEXT: s_max_i32 s4, s3, 0 ; GFX6-NEXT: s_sub_i32 s4, s8, s4 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s5, s3, 0 -; GFX6-NEXT: s_sub_i32 s5, s9, s5 -; GFX6-NEXT: s_cmp_gt_i32 s5, s7 -; GFX6-NEXT: s_cselect_b32 s5, s5, s7 -; GFX6-NEXT: s_cmp_lt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 +; GFX6-NEXT: s_max_i32 s5, s5, s7 +; GFX6-NEXT: s_min_i32 s4, s5, s4 ; GFX6-NEXT: s_add_i32 s3, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_v4i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_brev_b32 s8, -2 -; GFX8-NEXT: s_cselect_b32 s10, s0, 0 -; GFX8-NEXT: s_sub_i32 s10, s8, s10 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 ; GFX8-NEXT: s_brev_b32 s9, 1 -; GFX8-NEXT: s_cselect_b32 s11, s0, 0 +; GFX8-NEXT: s_min_i32 s11, s0, 0 ; GFX8-NEXT: s_sub_i32 s11, s9, s11 -; GFX8-NEXT: s_cmp_gt_i32 s11, s4 -; GFX8-NEXT: s_cselect_b32 s4, s11, s4 -; GFX8-NEXT: s_cmp_lt_i32 s4, s10 -; GFX8-NEXT: s_cselect_b32 s4, s4, s10 +; GFX8-NEXT: s_brev_b32 s8, -2 +; GFX8-NEXT: s_max_i32 s10, s0, 0 +; GFX8-NEXT: s_sub_i32 s10, s8, s10 +; GFX8-NEXT: s_max_i32 s4, s11, s4 +; GFX8-NEXT: s_min_i32 s4, s4, s10 +; GFX8-NEXT: s_min_i32 s10, s1, 0 ; GFX8-NEXT: s_add_i32 s0, s0, s4 -; GFX8-NEXT: s_cmp_gt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s4, s1, 0 -; GFX8-NEXT: s_sub_i32 s4, s8, s4 -; GFX8-NEXT: s_cmp_lt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s10, s1, 0 +; GFX8-NEXT: s_max_i32 s4, s1, 0 ; GFX8-NEXT: s_sub_i32 s10, s9, s10 -; GFX8-NEXT: s_cmp_gt_i32 s10, s5 -; GFX8-NEXT: s_cselect_b32 s5, s10, s5 -; GFX8-NEXT: s_cmp_lt_i32 s5, s4 -; GFX8-NEXT: s_cselect_b32 s4, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s8, s4 +; GFX8-NEXT: s_max_i32 s5, s10, s5 +; GFX8-NEXT: s_min_i32 s4, s5, s4 +; GFX8-NEXT: s_min_i32 s5, s2, 0 +; GFX8-NEXT: s_sub_i32 s5, s9, s5 ; GFX8-NEXT: s_add_i32 s1, s1, s4 -; GFX8-NEXT: s_cmp_gt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s4, s2, 0 +; GFX8-NEXT: s_max_i32 s4, s2, 0 ; GFX8-NEXT: s_sub_i32 s4, s8, s4 -; GFX8-NEXT: s_cmp_lt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s5, s2, 0 +; GFX8-NEXT: s_max_i32 s5, s5, s6 +; GFX8-NEXT: s_min_i32 s4, s5, s4 +; GFX8-NEXT: s_min_i32 s5, s3, 0 ; GFX8-NEXT: s_sub_i32 s5, s9, s5 -; GFX8-NEXT: s_cmp_gt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 -; GFX8-NEXT: s_cmp_lt_i32 s5, s4 -; GFX8-NEXT: s_cselect_b32 s4, s5, s4 ; GFX8-NEXT: s_add_i32 s2, s2, s4 -; GFX8-NEXT: s_cmp_gt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s4, s3, 0 +; GFX8-NEXT: s_max_i32 s4, s3, 0 ; GFX8-NEXT: s_sub_i32 s4, s8, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s5, s3, 0 -; GFX8-NEXT: s_sub_i32 s5, s9, s5 -; GFX8-NEXT: s_cmp_gt_i32 s5, s7 -; GFX8-NEXT: s_cselect_b32 s5, s5, s7 -; GFX8-NEXT: s_cmp_lt_i32 s5, s4 -; GFX8-NEXT: s_cselect_b32 s4, s5, s4 +; GFX8-NEXT: s_max_i32 s5, s5, s7 +; GFX8-NEXT: s_min_i32 s4, s5, s4 ; GFX8-NEXT: s_add_i32 s3, s3, s4 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1943,123 +1791,83 @@ define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v5i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s10, -2 -; GFX6-NEXT: s_cselect_b32 s12, s0, 0 -; GFX6-NEXT: s_sub_i32 s12, s10, s12 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s11, 1 -; GFX6-NEXT: s_cselect_b32 s13, s0, 0 +; GFX6-NEXT: s_min_i32 s13, s0, 0 ; GFX6-NEXT: s_sub_i32 s13, s11, s13 -; GFX6-NEXT: s_cmp_gt_i32 s13, s5 -; GFX6-NEXT: s_cselect_b32 s5, s13, s5 -; GFX6-NEXT: s_cmp_lt_i32 s5, s12 -; GFX6-NEXT: s_cselect_b32 s5, s5, s12 +; GFX6-NEXT: s_brev_b32 s10, -2 +; GFX6-NEXT: s_max_i32 s12, s0, 0 +; GFX6-NEXT: s_sub_i32 s12, s10, s12 +; GFX6-NEXT: s_max_i32 s5, s13, s5 +; GFX6-NEXT: s_min_i32 s5, s5, s12 +; GFX6-NEXT: s_min_i32 s12, s1, 0 ; GFX6-NEXT: s_add_i32 s0, s0, s5 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s5, s1, 0 -; GFX6-NEXT: s_sub_i32 s5, s10, s5 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s12, s1, 0 +; GFX6-NEXT: s_max_i32 s5, s1, 0 ; GFX6-NEXT: s_sub_i32 s12, s11, s12 -; GFX6-NEXT: s_cmp_gt_i32 s12, s6 -; GFX6-NEXT: s_cselect_b32 s6, s12, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s5 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 +; GFX6-NEXT: s_sub_i32 s5, s10, s5 +; GFX6-NEXT: s_max_i32 s6, s12, s6 +; GFX6-NEXT: s_min_i32 s5, s6, s5 +; GFX6-NEXT: s_min_i32 s6, s2, 0 +; GFX6-NEXT: s_sub_i32 s6, s11, s6 ; GFX6-NEXT: s_add_i32 s1, s1, s5 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s5, s2, 0 +; GFX6-NEXT: s_max_i32 s5, s2, 0 ; GFX6-NEXT: s_sub_i32 s5, s10, s5 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s6, s2, 0 +; GFX6-NEXT: s_max_i32 s6, s6, s7 +; GFX6-NEXT: s_min_i32 s5, s6, s5 +; GFX6-NEXT: s_min_i32 s6, s3, 0 ; GFX6-NEXT: s_sub_i32 s6, s11, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 -; GFX6-NEXT: s_cmp_lt_i32 s6, s5 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 ; GFX6-NEXT: s_add_i32 s2, s2, s5 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s5, s3, 0 +; GFX6-NEXT: s_max_i32 s5, s3, 0 ; GFX6-NEXT: s_sub_i32 s5, s10, s5 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s6, s3, 0 +; GFX6-NEXT: s_max_i32 s6, s6, s8 +; GFX6-NEXT: s_min_i32 s5, s6, s5 +; GFX6-NEXT: s_min_i32 s6, s4, 0 ; GFX6-NEXT: s_sub_i32 s6, s11, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s6, s6, s8 -; GFX6-NEXT: s_cmp_lt_i32 s6, s5 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 ; GFX6-NEXT: s_add_i32 s3, s3, s5 -; GFX6-NEXT: s_cmp_gt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s5, s4, 0 +; GFX6-NEXT: s_max_i32 s5, s4, 0 ; GFX6-NEXT: s_sub_i32 s5, s10, s5 -; GFX6-NEXT: s_cmp_lt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s6, s4, 0 -; GFX6-NEXT: s_sub_i32 s6, s11, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s9 -; GFX6-NEXT: s_cselect_b32 s6, s6, s9 -; GFX6-NEXT: s_cmp_lt_i32 s6, s5 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 +; GFX6-NEXT: s_max_i32 s6, s6, s9 +; GFX6-NEXT: s_min_i32 s5, s6, s5 ; GFX6-NEXT: s_add_i32 s4, s4, s5 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_v5i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_brev_b32 s10, -2 -; GFX8-NEXT: s_cselect_b32 s12, s0, 0 -; GFX8-NEXT: s_sub_i32 s12, s10, s12 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 ; GFX8-NEXT: s_brev_b32 s11, 1 -; GFX8-NEXT: s_cselect_b32 s13, s0, 0 +; GFX8-NEXT: s_min_i32 s13, s0, 0 ; GFX8-NEXT: s_sub_i32 s13, s11, s13 -; GFX8-NEXT: s_cmp_gt_i32 s13, s5 -; GFX8-NEXT: s_cselect_b32 s5, s13, s5 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 +; GFX8-NEXT: s_brev_b32 s10, -2 +; GFX8-NEXT: s_max_i32 s12, s0, 0 +; GFX8-NEXT: s_sub_i32 s12, s10, s12 +; GFX8-NEXT: s_max_i32 s5, s13, s5 +; GFX8-NEXT: s_min_i32 s5, s5, s12 +; GFX8-NEXT: s_min_i32 s12, s1, 0 ; GFX8-NEXT: s_add_i32 s0, s0, s5 -; GFX8-NEXT: s_cmp_gt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s5, s1, 0 -; GFX8-NEXT: s_sub_i32 s5, s10, s5 -; GFX8-NEXT: s_cmp_lt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s12, s1, 0 +; GFX8-NEXT: s_max_i32 s5, s1, 0 ; GFX8-NEXT: s_sub_i32 s12, s11, s12 -; GFX8-NEXT: s_cmp_gt_i32 s12, s6 -; GFX8-NEXT: s_cselect_b32 s6, s12, s6 -; GFX8-NEXT: s_cmp_lt_i32 s6, s5 -; GFX8-NEXT: s_cselect_b32 s5, s6, s5 +; GFX8-NEXT: s_sub_i32 s5, s10, s5 +; GFX8-NEXT: s_max_i32 s6, s12, s6 +; GFX8-NEXT: s_min_i32 s5, s6, s5 +; GFX8-NEXT: s_min_i32 s6, s2, 0 +; GFX8-NEXT: s_sub_i32 s6, s11, s6 ; GFX8-NEXT: s_add_i32 s1, s1, s5 -; GFX8-NEXT: s_cmp_gt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s5, s2, 0 +; GFX8-NEXT: s_max_i32 s5, s2, 0 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 -; GFX8-NEXT: s_cmp_lt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s6, s2, 0 +; GFX8-NEXT: s_max_i32 s6, s6, s7 +; GFX8-NEXT: s_min_i32 s5, s6, s5 +; GFX8-NEXT: s_min_i32 s6, s3, 0 ; GFX8-NEXT: s_sub_i32 s6, s11, s6 -; GFX8-NEXT: s_cmp_gt_i32 s6, s7 -; GFX8-NEXT: s_cselect_b32 s6, s6, s7 -; GFX8-NEXT: s_cmp_lt_i32 s6, s5 -; GFX8-NEXT: s_cselect_b32 s5, s6, s5 ; GFX8-NEXT: s_add_i32 s2, s2, s5 -; GFX8-NEXT: s_cmp_gt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s5, s3, 0 +; GFX8-NEXT: s_max_i32 s5, s3, 0 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s6, s3, 0 +; GFX8-NEXT: s_max_i32 s6, s6, s8 +; GFX8-NEXT: s_min_i32 s5, s6, s5 +; GFX8-NEXT: s_min_i32 s6, s4, 0 ; GFX8-NEXT: s_sub_i32 s6, s11, s6 -; GFX8-NEXT: s_cmp_gt_i32 s6, s8 -; GFX8-NEXT: s_cselect_b32 s6, s6, s8 -; GFX8-NEXT: s_cmp_lt_i32 s6, s5 -; GFX8-NEXT: s_cselect_b32 s5, s6, s5 ; GFX8-NEXT: s_add_i32 s3, s3, s5 -; GFX8-NEXT: s_cmp_gt_i32 s4, 0 -; GFX8-NEXT: s_cselect_b32 s5, s4, 0 +; GFX8-NEXT: s_max_i32 s5, s4, 0 ; GFX8-NEXT: s_sub_i32 s5, s10, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, 0 -; GFX8-NEXT: s_cselect_b32 s6, s4, 0 -; GFX8-NEXT: s_sub_i32 s6, s11, s6 -; GFX8-NEXT: s_cmp_gt_i32 s6, s9 -; GFX8-NEXT: s_cselect_b32 s6, s6, s9 -; GFX8-NEXT: s_cmp_lt_i32 s6, s5 -; GFX8-NEXT: s_cselect_b32 s5, s6, s5 +; GFX8-NEXT: s_max_i32 s6, s6, s9 +; GFX8-NEXT: s_min_i32 s5, s6, s5 ; GFX8-NEXT: s_add_i32 s4, s4, s5 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2391,365 +2199,237 @@ define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) { ; GFX6-LABEL: s_saddsat_v16i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s32, -2 -; GFX6-NEXT: s_cselect_b32 s34, s0, 0 -; GFX6-NEXT: s_sub_i32 s34, s32, s34 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s33, 1 -; GFX6-NEXT: s_cselect_b32 s35, s0, 0 +; GFX6-NEXT: s_min_i32 s35, s0, 0 ; GFX6-NEXT: s_sub_i32 s35, s33, s35 -; GFX6-NEXT: s_cmp_gt_i32 s35, s16 -; GFX6-NEXT: s_cselect_b32 s16, s35, s16 -; GFX6-NEXT: s_cmp_lt_i32 s16, s34 -; GFX6-NEXT: s_cselect_b32 s16, s16, s34 +; GFX6-NEXT: s_brev_b32 s32, -2 +; GFX6-NEXT: s_max_i32 s34, s0, 0 +; GFX6-NEXT: s_sub_i32 s34, s32, s34 +; GFX6-NEXT: s_max_i32 s16, s35, s16 +; GFX6-NEXT: s_min_i32 s16, s16, s34 +; GFX6-NEXT: s_min_i32 s34, s1, 0 ; GFX6-NEXT: s_add_i32 s0, s0, s16 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s16, s1, 0 -; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s34, s1, 0 +; GFX6-NEXT: s_max_i32 s16, s1, 0 ; GFX6-NEXT: s_sub_i32 s34, s33, s34 -; GFX6-NEXT: s_cmp_gt_i32 s34, s17 -; GFX6-NEXT: s_cselect_b32 s17, s34, s17 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 +; GFX6-NEXT: s_sub_i32 s16, s32, s16 +; GFX6-NEXT: s_max_i32 s17, s34, s17 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s2, 0 +; GFX6-NEXT: s_sub_i32 s17, s33, s17 ; GFX6-NEXT: s_add_i32 s1, s1, s16 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s16, s2, 0 +; GFX6-NEXT: s_max_i32 s16, s2, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s17, s2, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s18 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s3, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s18 -; GFX6-NEXT: s_cselect_b32 s17, s17, s18 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s2, s2, s16 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s16, s3, 0 +; GFX6-NEXT: s_max_i32 s16, s3, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s17, s3, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s19 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s4, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s19 -; GFX6-NEXT: s_cselect_b32 s17, s17, s19 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s3, s3, s16 -; GFX6-NEXT: s_cmp_gt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s16, s4, 0 +; GFX6-NEXT: s_max_i32 s16, s4, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s17, s4, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s20 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s5, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s20 -; GFX6-NEXT: s_cselect_b32 s17, s17, s20 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s4, s4, s16 -; GFX6-NEXT: s_cmp_gt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s16, s5, 0 +; GFX6-NEXT: s_max_i32 s16, s5, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s17, s5, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s21 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s6, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s21 -; GFX6-NEXT: s_cselect_b32 s17, s17, s21 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s5, s5, s16 -; GFX6-NEXT: s_cmp_gt_i32 s6, 0 -; GFX6-NEXT: s_cselect_b32 s16, s6, 0 +; GFX6-NEXT: s_max_i32 s16, s6, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s6, 0 -; GFX6-NEXT: s_cselect_b32 s17, s6, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s22 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s7, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s22 -; GFX6-NEXT: s_cselect_b32 s17, s17, s22 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s6, s6, s16 -; GFX6-NEXT: s_cmp_gt_i32 s7, 0 -; GFX6-NEXT: s_cselect_b32 s16, s7, 0 +; GFX6-NEXT: s_max_i32 s16, s7, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s7, 0 -; GFX6-NEXT: s_cselect_b32 s17, s7, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s23 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s8, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s23 -; GFX6-NEXT: s_cselect_b32 s17, s17, s23 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s7, s7, s16 -; GFX6-NEXT: s_cmp_gt_i32 s8, 0 -; GFX6-NEXT: s_cselect_b32 s16, s8, 0 +; GFX6-NEXT: s_max_i32 s16, s8, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s8, 0 -; GFX6-NEXT: s_cselect_b32 s17, s8, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s24 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s9, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s24 -; GFX6-NEXT: s_cselect_b32 s17, s17, s24 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s8, s8, s16 -; GFX6-NEXT: s_cmp_gt_i32 s9, 0 -; GFX6-NEXT: s_cselect_b32 s16, s9, 0 +; GFX6-NEXT: s_max_i32 s16, s9, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s9, 0 -; GFX6-NEXT: s_cselect_b32 s17, s9, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s25 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s10, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s25 -; GFX6-NEXT: s_cselect_b32 s17, s17, s25 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_gt_i32 s10, 0 -; GFX6-NEXT: s_cselect_b32 s16, s10, 0 +; GFX6-NEXT: s_max_i32 s16, s10, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s10, 0 -; GFX6-NEXT: s_cselect_b32 s17, s10, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s26 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s11, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s26 -; GFX6-NEXT: s_cselect_b32 s17, s17, s26 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s10, s10, s16 -; GFX6-NEXT: s_cmp_gt_i32 s11, 0 -; GFX6-NEXT: s_cselect_b32 s16, s11, 0 +; GFX6-NEXT: s_max_i32 s16, s11, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s11, 0 -; GFX6-NEXT: s_cselect_b32 s17, s11, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s27 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s12, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s27 -; GFX6-NEXT: s_cselect_b32 s17, s17, s27 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s11, s11, s16 -; GFX6-NEXT: s_cmp_gt_i32 s12, 0 -; GFX6-NEXT: s_cselect_b32 s16, s12, 0 +; GFX6-NEXT: s_max_i32 s16, s12, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s12, 0 -; GFX6-NEXT: s_cselect_b32 s17, s12, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s28 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s13, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s28 -; GFX6-NEXT: s_cselect_b32 s17, s17, s28 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s12, s12, s16 -; GFX6-NEXT: s_cmp_gt_i32 s13, 0 -; GFX6-NEXT: s_cselect_b32 s16, s13, 0 +; GFX6-NEXT: s_max_i32 s16, s13, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s13, 0 -; GFX6-NEXT: s_cselect_b32 s17, s13, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s29 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s14, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s29 -; GFX6-NEXT: s_cselect_b32 s17, s17, s29 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s13, s13, s16 -; GFX6-NEXT: s_cmp_gt_i32 s14, 0 -; GFX6-NEXT: s_cselect_b32 s16, s14, 0 +; GFX6-NEXT: s_max_i32 s16, s14, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s14, 0 -; GFX6-NEXT: s_cselect_b32 s17, s14, 0 +; GFX6-NEXT: s_max_i32 s17, s17, s30 +; GFX6-NEXT: s_min_i32 s16, s17, s16 +; GFX6-NEXT: s_min_i32 s17, s15, 0 ; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s30 -; GFX6-NEXT: s_cselect_b32 s17, s17, s30 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s14, s14, s16 -; GFX6-NEXT: s_cmp_gt_i32 s15, 0 -; GFX6-NEXT: s_cselect_b32 s16, s15, 0 +; GFX6-NEXT: s_max_i32 s16, s15, 0 ; GFX6-NEXT: s_sub_i32 s16, s32, s16 -; GFX6-NEXT: s_cmp_lt_i32 s15, 0 -; GFX6-NEXT: s_cselect_b32 s17, s15, 0 -; GFX6-NEXT: s_sub_i32 s17, s33, s17 -; GFX6-NEXT: s_cmp_gt_i32 s17, s31 -; GFX6-NEXT: s_cselect_b32 s17, s17, s31 -; GFX6-NEXT: s_cmp_lt_i32 s17, s16 -; GFX6-NEXT: s_cselect_b32 s16, s17, s16 +; GFX6-NEXT: s_max_i32 s17, s17, s31 +; GFX6-NEXT: s_min_i32 s16, s17, s16 ; GFX6-NEXT: s_add_i32 s15, s15, s16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_saddsat_v16i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, 0 -; GFX8-NEXT: s_brev_b32 s32, -2 -; GFX8-NEXT: s_cselect_b32 s34, s0, 0 -; GFX8-NEXT: s_sub_i32 s34, s32, s34 -; GFX8-NEXT: s_cmp_lt_i32 s0, 0 ; GFX8-NEXT: s_brev_b32 s33, 1 -; GFX8-NEXT: s_cselect_b32 s35, s0, 0 +; GFX8-NEXT: s_min_i32 s35, s0, 0 ; GFX8-NEXT: s_sub_i32 s35, s33, s35 -; GFX8-NEXT: s_cmp_gt_i32 s35, s16 -; GFX8-NEXT: s_cselect_b32 s16, s35, s16 -; GFX8-NEXT: s_cmp_lt_i32 s16, s34 -; GFX8-NEXT: s_cselect_b32 s16, s16, s34 +; GFX8-NEXT: s_brev_b32 s32, -2 +; GFX8-NEXT: s_max_i32 s34, s0, 0 +; GFX8-NEXT: s_sub_i32 s34, s32, s34 +; GFX8-NEXT: s_max_i32 s16, s35, s16 +; GFX8-NEXT: s_min_i32 s16, s16, s34 +; GFX8-NEXT: s_min_i32 s34, s1, 0 ; GFX8-NEXT: s_add_i32 s0, s0, s16 -; GFX8-NEXT: s_cmp_gt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s16, s1, 0 -; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s1, 0 -; GFX8-NEXT: s_cselect_b32 s34, s1, 0 +; GFX8-NEXT: s_max_i32 s16, s1, 0 ; GFX8-NEXT: s_sub_i32 s34, s33, s34 -; GFX8-NEXT: s_cmp_gt_i32 s34, s17 -; GFX8-NEXT: s_cselect_b32 s17, s34, s17 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 +; GFX8-NEXT: s_sub_i32 s16, s32, s16 +; GFX8-NEXT: s_max_i32 s17, s34, s17 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s2, 0 +; GFX8-NEXT: s_sub_i32 s17, s33, s17 ; GFX8-NEXT: s_add_i32 s1, s1, s16 -; GFX8-NEXT: s_cmp_gt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s16, s2, 0 +; GFX8-NEXT: s_max_i32 s16, s2, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s2, 0 -; GFX8-NEXT: s_cselect_b32 s17, s2, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s18 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s3, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s18 -; GFX8-NEXT: s_cselect_b32 s17, s17, s18 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s2, s2, s16 -; GFX8-NEXT: s_cmp_gt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s16, s3, 0 +; GFX8-NEXT: s_max_i32 s16, s3, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s3, 0 -; GFX8-NEXT: s_cselect_b32 s17, s3, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s19 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s4, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s19 -; GFX8-NEXT: s_cselect_b32 s17, s17, s19 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s3, s3, s16 -; GFX8-NEXT: s_cmp_gt_i32 s4, 0 -; GFX8-NEXT: s_cselect_b32 s16, s4, 0 +; GFX8-NEXT: s_max_i32 s16, s4, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s4, 0 -; GFX8-NEXT: s_cselect_b32 s17, s4, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s20 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s5, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s20 -; GFX8-NEXT: s_cselect_b32 s17, s17, s20 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s4, s4, s16 -; GFX8-NEXT: s_cmp_gt_i32 s5, 0 -; GFX8-NEXT: s_cselect_b32 s16, s5, 0 +; GFX8-NEXT: s_max_i32 s16, s5, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s5, 0 -; GFX8-NEXT: s_cselect_b32 s17, s5, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s21 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s6, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s21 -; GFX8-NEXT: s_cselect_b32 s17, s17, s21 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s5, s5, s16 -; GFX8-NEXT: s_cmp_gt_i32 s6, 0 -; GFX8-NEXT: s_cselect_b32 s16, s6, 0 +; GFX8-NEXT: s_max_i32 s16, s6, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s6, 0 -; GFX8-NEXT: s_cselect_b32 s17, s6, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s22 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s7, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s22 -; GFX8-NEXT: s_cselect_b32 s17, s17, s22 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s6, s6, s16 -; GFX8-NEXT: s_cmp_gt_i32 s7, 0 -; GFX8-NEXT: s_cselect_b32 s16, s7, 0 +; GFX8-NEXT: s_max_i32 s16, s7, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s7, 0 -; GFX8-NEXT: s_cselect_b32 s17, s7, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s23 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s8, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s23 -; GFX8-NEXT: s_cselect_b32 s17, s17, s23 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s7, s7, s16 -; GFX8-NEXT: s_cmp_gt_i32 s8, 0 -; GFX8-NEXT: s_cselect_b32 s16, s8, 0 +; GFX8-NEXT: s_max_i32 s16, s8, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s8, 0 -; GFX8-NEXT: s_cselect_b32 s17, s8, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s24 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s9, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s24 -; GFX8-NEXT: s_cselect_b32 s17, s17, s24 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s8, s8, s16 -; GFX8-NEXT: s_cmp_gt_i32 s9, 0 -; GFX8-NEXT: s_cselect_b32 s16, s9, 0 +; GFX8-NEXT: s_max_i32 s16, s9, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s9, 0 -; GFX8-NEXT: s_cselect_b32 s17, s9, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s25 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s10, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s25 -; GFX8-NEXT: s_cselect_b32 s17, s17, s25 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s9, s9, s16 -; GFX8-NEXT: s_cmp_gt_i32 s10, 0 -; GFX8-NEXT: s_cselect_b32 s16, s10, 0 +; GFX8-NEXT: s_max_i32 s16, s10, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s10, 0 -; GFX8-NEXT: s_cselect_b32 s17, s10, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s26 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s11, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s26 -; GFX8-NEXT: s_cselect_b32 s17, s17, s26 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s10, s10, s16 -; GFX8-NEXT: s_cmp_gt_i32 s11, 0 -; GFX8-NEXT: s_cselect_b32 s16, s11, 0 +; GFX8-NEXT: s_max_i32 s16, s11, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s11, 0 -; GFX8-NEXT: s_cselect_b32 s17, s11, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s27 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s12, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s27 -; GFX8-NEXT: s_cselect_b32 s17, s17, s27 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s11, s11, s16 -; GFX8-NEXT: s_cmp_gt_i32 s12, 0 -; GFX8-NEXT: s_cselect_b32 s16, s12, 0 +; GFX8-NEXT: s_max_i32 s16, s12, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s12, 0 -; GFX8-NEXT: s_cselect_b32 s17, s12, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s28 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s13, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s28 -; GFX8-NEXT: s_cselect_b32 s17, s17, s28 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s12, s12, s16 -; GFX8-NEXT: s_cmp_gt_i32 s13, 0 -; GFX8-NEXT: s_cselect_b32 s16, s13, 0 +; GFX8-NEXT: s_max_i32 s16, s13, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s13, 0 -; GFX8-NEXT: s_cselect_b32 s17, s13, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s29 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s14, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s29 -; GFX8-NEXT: s_cselect_b32 s17, s17, s29 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s13, s13, s16 -; GFX8-NEXT: s_cmp_gt_i32 s14, 0 -; GFX8-NEXT: s_cselect_b32 s16, s14, 0 +; GFX8-NEXT: s_max_i32 s16, s14, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s14, 0 -; GFX8-NEXT: s_cselect_b32 s17, s14, 0 +; GFX8-NEXT: s_max_i32 s17, s17, s30 +; GFX8-NEXT: s_min_i32 s16, s17, s16 +; GFX8-NEXT: s_min_i32 s17, s15, 0 ; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s30 -; GFX8-NEXT: s_cselect_b32 s17, s17, s30 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s14, s14, s16 -; GFX8-NEXT: s_cmp_gt_i32 s15, 0 -; GFX8-NEXT: s_cselect_b32 s16, s15, 0 +; GFX8-NEXT: s_max_i32 s16, s15, 0 ; GFX8-NEXT: s_sub_i32 s16, s32, s16 -; GFX8-NEXT: s_cmp_lt_i32 s15, 0 -; GFX8-NEXT: s_cselect_b32 s17, s15, 0 -; GFX8-NEXT: s_sub_i32 s17, s33, s17 -; GFX8-NEXT: s_cmp_gt_i32 s17, s31 -; GFX8-NEXT: s_cselect_b32 s17, s17, s31 -; GFX8-NEXT: s_cmp_lt_i32 s17, s16 -; GFX8-NEXT: s_cselect_b32 s16, s17, s16 +; GFX8-NEXT: s_max_i32 s17, s17, s31 +; GFX8-NEXT: s_min_i32 s16, s17, s16 ; GFX8-NEXT: s_add_i32 s15, s15, s16 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2892,17 +2572,13 @@ ; GFX6-LABEL: s_saddsat_i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_min_i32 s3, s0, 0 +; GFX6-NEXT: s_max_i32 s2, s0, 0 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 -; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s3, s0, 0 ; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3 -; GFX6-NEXT: s_cmp_gt_i32 s3, s1 -; GFX6-NEXT: s_cselect_b32 s1, s3, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s1, s1, s2 +; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2 +; GFX6-NEXT: s_max_i32 s1, s3, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: ; return to shader part epilog @@ -2911,20 +2587,16 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_sext_i32_i16 s2, s0 ; GFX8-NEXT: s_sext_i32_i16 s3, 0 -; GFX8-NEXT: s_cmp_gt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s4, s2, s3 -; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4 -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_max_i32 s4, s2, s3 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_sub_i32 s2, 0xffff8000, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s2, s1 -; GFX8-NEXT: s_cselect_b32 s1, s2, s1 +; GFX8-NEXT: s_max_i32 s1, s2, s1 +; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s2, s4 -; GFX8-NEXT: s_cmp_lt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s1, s1, s2 +; GFX8-NEXT: s_min_i32 s1, s1, s2 ; GFX8-NEXT: s_add_i32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2948,13 +2620,11 @@ ; GFX6-LABEL: saddsat_i16_sv: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s1, s0, 0 -; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 +; GFX6-NEXT: s_min_i32 s2, s0, 0 +; GFX6-NEXT: s_max_i32 s1, s0, 0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_sub_i32 s2, 0x80000000, s2 +; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1 ; GFX6-NEXT: v_max_i32_e32 v0, s2, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s1, v0 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 @@ -2965,12 +2635,10 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_sext_i32_i16 s1, s0 ; GFX8-NEXT: s_sext_i32_i16 s2, 0 -; GFX8-NEXT: s_cmp_gt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s3, s1, s2 -; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3 -; GFX8-NEXT: s_cmp_lt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s1, s1, s2 +; GFX8-NEXT: s_max_i32 s3, s1, s2 +; GFX8-NEXT: s_min_i32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s1, 0xffff8000, s1 +; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3 ; GFX8-NEXT: v_max_i16_e32 v0, s1, v0 ; GFX8-NEXT: v_min_i16_e32 v0, s3, v0 ; GFX8-NEXT: v_add_u16_e32 v0, s0, v0 @@ -3101,36 +2769,28 @@ ; GFX6-LABEL: s_saddsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_brev_b32 s5, 1 +; GFX6-NEXT: s_min_i32 s7, s0, 0 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s7, s5, s7 ; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, 0 +; GFX6-NEXT: s_max_i32 s6, s0, 0 ; GFX6-NEXT: s_sub_i32 s6, s4, s6 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, 0 -; GFX6-NEXT: s_sub_i32 s7, s5, s7 -; GFX6-NEXT: s_cmp_gt_i32 s7, s2 -; GFX6-NEXT: s_cselect_b32 s2, s7, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s2, s2, s6 -; GFX6-NEXT: s_add_i32 s0, s0, s2 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s2, s7, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s3, s1, 0 +; GFX6-NEXT: s_max_i32 s3, s1, 0 ; GFX6-NEXT: s_sub_i32 s3, s4, s3 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s4, s1, 0 +; GFX6-NEXT: s_min_i32 s4, s1, 0 ; GFX6-NEXT: s_sub_i32 s4, s5, s4 -; GFX6-NEXT: s_cmp_gt_i32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s2, s4, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s2, s2, s3 +; GFX6-NEXT: s_max_i32 s2, s4, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s3 ; GFX6-NEXT: s_add_i32 s1, s1, s2 -; GFX6-NEXT: s_mov_b32 s2, 0xffff ; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -3139,42 +2799,34 @@ ; ; GFX8-LABEL: s_saddsat_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 ; GFX8-NEXT: s_sext_i32_i16 s6, s0 ; GFX8-NEXT: s_sext_i32_i16 s7, 0 -; GFX8-NEXT: s_cmp_gt_i32 s6, s7 -; GFX8-NEXT: s_movk_i32 s4, 0x7fff -; GFX8-NEXT: s_cselect_b32 s8, s6, s7 -; GFX8-NEXT: s_sub_i32 s8, s4, s8 -; GFX8-NEXT: s_cmp_lt_i32 s6, s7 +; GFX8-NEXT: s_max_i32 s8, s6, s7 ; GFX8-NEXT: s_movk_i32 s5, 0x8000 -; GFX8-NEXT: s_cselect_b32 s6, s6, s7 +; GFX8-NEXT: s_min_i32 s6, s6, s7 ; GFX8-NEXT: s_sub_i32 s6, s5, s6 +; GFX8-NEXT: s_lshr_b32 s3, s1, 16 +; GFX8-NEXT: s_movk_i32 s4, 0x7fff ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s6, s1 -; GFX8-NEXT: s_cselect_b32 s1, s6, s1 +; GFX8-NEXT: s_max_i32 s1, s6, s1 +; GFX8-NEXT: s_sub_i32 s8, s4, s8 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s6, s8 -; GFX8-NEXT: s_cmp_lt_i32 s1, s6 -; GFX8-NEXT: s_cselect_b32 s1, s1, s6 +; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_min_i32 s1, s1, s6 ; GFX8-NEXT: s_add_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s2 -; GFX8-NEXT: s_cmp_gt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s6, s1, s7 -; GFX8-NEXT: s_sub_i32 s4, s4, s6 -; GFX8-NEXT: s_cmp_lt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s1, s1, s7 +; GFX8-NEXT: s_max_i32 s6, s1, s7 +; GFX8-NEXT: s_min_i32 s1, s1, s7 ; GFX8-NEXT: s_sub_i32 s1, s5, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_max_i32 s1, s1, s3 +; GFX8-NEXT: s_sub_i32 s4, s4, s6 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s4 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_add_i32 s2, s2, s1 ; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 @@ -3203,24 +2855,20 @@ ; GFX6-LABEL: saddsat_v2i16_sv: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s2, -2 -; GFX6-NEXT: s_cselect_b32 s4, s0, 0 -; GFX6-NEXT: s_sub_i32 s4, s2, s4 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 ; GFX6-NEXT: s_brev_b32 s3, 1 -; GFX6-NEXT: s_cselect_b32 s5, s0, 0 +; GFX6-NEXT: s_min_i32 s5, s0, 0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_sub_i32 s5, s3, s5 +; GFX6-NEXT: s_brev_b32 s2, -2 +; GFX6-NEXT: s_max_i32 s4, s0, 0 +; GFX6-NEXT: s_sub_i32 s4, s2, s4 ; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s1, s0, 0 +; GFX6-NEXT: s_max_i32 s1, s0, 0 ; GFX6-NEXT: s_sub_i32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_cselect_b32 s2, s0, 0 +; GFX6-NEXT: s_min_i32 s2, s0, 0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: s_sub_i32 s2, s3, s2 ; GFX6-NEXT: v_max_i32_e32 v1, s2, v1 @@ -3237,28 +2885,24 @@ ; ; GFX8-LABEL: saddsat_v2i16_sv: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_sext_i32_i16 s4, s0 ; GFX8-NEXT: s_sext_i32_i16 s5, 0 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 -; GFX8-NEXT: s_movk_i32 s2, 0x7fff -; GFX8-NEXT: s_cselect_b32 s6, s4, s5 -; GFX8-NEXT: s_sub_i32 s6, s2, s6 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 +; GFX8-NEXT: s_max_i32 s6, s4, s5 ; GFX8-NEXT: s_movk_i32 s3, 0x8000 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 +; GFX8-NEXT: s_movk_i32 s2, 0x7fff ; GFX8-NEXT: s_sub_i32 s4, s3, s4 +; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: v_max_i16_e32 v1, s4, v0 +; GFX8-NEXT: s_sub_i32 s6, s2, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 ; GFX8-NEXT: v_min_i16_e32 v1, s6, v1 -; GFX8-NEXT: s_cselect_b32 s6, s4, s5 -; GFX8-NEXT: s_sub_i32 s2, s2, s6 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_max_i32 s6, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_sub_i32 s3, s3, s4 ; GFX8-NEXT: v_mov_b32_e32 v2, s3 ; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: s_sub_i32 s2, s2, s6 ; GFX8-NEXT: v_min_i16_e32 v0, s2, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, s1 ; GFX8-NEXT: v_add_u16_e32 v1, s0, v1 @@ -3481,64 +3125,48 @@ ; GFX6-LABEL: s_saddsat_v4i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_brev_b32 s9, 1 +; GFX6-NEXT: s_min_i32 s11, s0, 0 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s11, s9, s11 ; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, 0 +; GFX6-NEXT: s_max_i32 s10, s0, 0 ; GFX6-NEXT: s_sub_i32 s10, s8, s10 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, 0 -; GFX6-NEXT: s_sub_i32 s11, s9, s11 -; GFX6-NEXT: s_cmp_gt_i32 s11, s4 -; GFX6-NEXT: s_cselect_b32 s4, s11, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s10 -; GFX6-NEXT: s_cselect_b32 s4, s4, s10 -; GFX6-NEXT: s_add_i32 s0, s0, s4 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s4, s11, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_min_i32 s4, s4, s10 +; GFX6-NEXT: s_min_i32 s10, s1, 0 +; GFX6-NEXT: s_add_i32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s4, s5, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s5, s1, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s10, s1, 0 +; GFX6-NEXT: s_max_i32 s5, s1, 0 ; GFX6-NEXT: s_sub_i32 s10, s9, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s4 -; GFX6-NEXT: s_cselect_b32 s4, s10, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 -; GFX6-NEXT: s_add_i32 s1, s1, s4 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s4, s10, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s5 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s4 ; GFX6-NEXT: s_lshl_b32 s4, s6, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s5, s2, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s6, s2, 0 +; GFX6-NEXT: s_min_i32 s6, s2, 0 +; GFX6-NEXT: s_max_i32 s5, s2, 0 ; GFX6-NEXT: s_sub_i32 s6, s9, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s4, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 -; GFX6-NEXT: s_add_i32 s2, s2, s4 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s4, s6, s4 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s4, s4, s5 +; GFX6-NEXT: s_min_i32 s6, s3, 0 +; GFX6-NEXT: s_add_i32 s2, s2, s4 +; GFX6-NEXT: s_max_i32 s5, s3, 0 ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s5, s3, 0 -; GFX6-NEXT: s_sub_i32 s5, s8, s5 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s6, s3, 0 ; GFX6-NEXT: s_sub_i32 s6, s9, s6 -; GFX6-NEXT: s_cmp_gt_i32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s4, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 +; GFX6-NEXT: s_sub_i32 s5, s8, s5 +; GFX6-NEXT: s_max_i32 s4, s6, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s5 ; GFX6-NEXT: s_add_i32 s3, s3, s4 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 ; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -3551,76 +3179,60 @@ ; ; GFX8-LABEL: s_saddsat_v4i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s6, s2, 16 -; GFX8-NEXT: s_lshr_b32 s7, s3, 16 -; GFX8-NEXT: s_lshr_b32 s4, s0, 16 -; GFX8-NEXT: s_lshr_b32 s5, s1, 16 ; GFX8-NEXT: s_sext_i32_i16 s10, s0 ; GFX8-NEXT: s_sext_i32_i16 s11, 0 -; GFX8-NEXT: s_cmp_gt_i32 s10, s11 -; GFX8-NEXT: s_movk_i32 s8, 0x7fff -; GFX8-NEXT: s_cselect_b32 s12, s10, s11 -; GFX8-NEXT: s_sub_i32 s12, s8, s12 -; GFX8-NEXT: s_cmp_lt_i32 s10, s11 +; GFX8-NEXT: s_max_i32 s12, s10, s11 ; GFX8-NEXT: s_movk_i32 s9, 0x8000 -; GFX8-NEXT: s_cselect_b32 s10, s10, s11 +; GFX8-NEXT: s_min_i32 s10, s10, s11 ; GFX8-NEXT: s_sub_i32 s10, s9, s10 +; GFX8-NEXT: s_lshr_b32 s6, s2, 16 +; GFX8-NEXT: s_movk_i32 s8, 0x7fff ; GFX8-NEXT: s_sext_i32_i16 s10, s10 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s10, s2 -; GFX8-NEXT: s_cselect_b32 s2, s10, s2 +; GFX8-NEXT: s_max_i32 s2, s10, s2 +; GFX8-NEXT: s_sub_i32 s12, s8, s12 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s10, s12 -; GFX8-NEXT: s_cmp_lt_i32 s2, s10 -; GFX8-NEXT: s_cselect_b32 s2, s2, s10 +; GFX8-NEXT: s_lshr_b32 s4, s0, 16 +; GFX8-NEXT: s_min_i32 s2, s2, s10 ; GFX8-NEXT: s_add_i32 s0, s0, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s4 -; GFX8-NEXT: s_cmp_gt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s10, s2, s11 -; GFX8-NEXT: s_sub_i32 s10, s8, s10 -; GFX8-NEXT: s_cmp_lt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s2, s2, s11 +; GFX8-NEXT: s_max_i32 s10, s2, s11 +; GFX8-NEXT: s_min_i32 s2, s2, s11 ; GFX8-NEXT: s_sub_i32 s2, s9, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_gt_i32 s2, s6 -; GFX8-NEXT: s_cselect_b32 s2, s2, s6 +; GFX8-NEXT: s_max_i32 s2, s2, s6 +; GFX8-NEXT: s_sub_i32 s10, s8, s10 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s6, s10 -; GFX8-NEXT: s_cmp_lt_i32 s2, s6 -; GFX8-NEXT: s_cselect_b32 s2, s2, s6 +; GFX8-NEXT: s_min_i32 s2, s2, s6 ; GFX8-NEXT: s_add_i32 s4, s4, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s1 -; GFX8-NEXT: s_cmp_gt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s6, s2, s11 -; GFX8-NEXT: s_sub_i32 s6, s8, s6 -; GFX8-NEXT: s_cmp_lt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s2, s2, s11 +; GFX8-NEXT: s_max_i32 s6, s2, s11 +; GFX8-NEXT: s_min_i32 s2, s2, s11 ; GFX8-NEXT: s_sub_i32 s2, s9, s2 +; GFX8-NEXT: s_lshr_b32 s7, s3, 16 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_max_i32 s2, s2, s3 +; GFX8-NEXT: s_sub_i32 s6, s8, s6 ; GFX8-NEXT: s_sext_i32_i16 s3, s6 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_lshr_b32 s5, s1, 16 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_add_i32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s5 -; GFX8-NEXT: s_cmp_gt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s3, s2, s11 -; GFX8-NEXT: s_sub_i32 s3, s8, s3 -; GFX8-NEXT: s_cmp_lt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s2, s2, s11 +; GFX8-NEXT: s_max_i32 s3, s2, s11 +; GFX8-NEXT: s_min_i32 s2, s2, s11 ; GFX8-NEXT: s_sub_i32 s2, s9, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s6, s7 -; GFX8-NEXT: s_cmp_gt_i32 s2, s6 -; GFX8-NEXT: s_cselect_b32 s2, s2, s6 +; GFX8-NEXT: s_sub_i32 s3, s8, s3 +; GFX8-NEXT: s_max_i32 s2, s2, s6 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_add_i32 s5, s5, s2 ; GFX8-NEXT: s_bfe_u32 s2, s4, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 @@ -3830,92 +3442,67 @@ ; GFX6-LABEL: s_saddsat_v6i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_brev_b32 s13, 1 +; GFX6-NEXT: s_min_i32 s15, s0, 0 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s15, s13, s15 ; GFX6-NEXT: s_brev_b32 s12, -2 -; GFX6-NEXT: s_cselect_b32 s14, s0, 0 +; GFX6-NEXT: s_max_i32 s14, s0, 0 ; GFX6-NEXT: s_sub_i32 s14, s12, s14 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s13, 1 -; GFX6-NEXT: s_cselect_b32 s15, s0, 0 -; GFX6-NEXT: s_sub_i32 s15, s13, s15 -; GFX6-NEXT: s_cmp_gt_i32 s15, s6 -; GFX6-NEXT: s_cselect_b32 s6, s15, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s14 -; GFX6-NEXT: s_cselect_b32 s6, s6, s14 -; GFX6-NEXT: s_add_i32 s0, s0, s6 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s6, s15, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s14 +; GFX6-NEXT: s_min_i32 s14, s1, 0 +; GFX6-NEXT: s_add_i32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s6, s7, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s7, s1, 0 -; GFX6-NEXT: s_sub_i32 s7, s12, s7 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s14, s1, 0 +; GFX6-NEXT: s_max_i32 s7, s1, 0 ; GFX6-NEXT: s_sub_i32 s14, s13, s14 -; GFX6-NEXT: s_cmp_gt_i32 s14, s6 -; GFX6-NEXT: s_cselect_b32 s6, s14, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 -; GFX6-NEXT: s_add_i32 s1, s1, s6 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s7, s12, s7 +; GFX6-NEXT: s_max_i32 s6, s14, s6 +; GFX6-NEXT: s_min_i32 s6, s6, s7 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s6 ; GFX6-NEXT: s_lshl_b32 s6, s8, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s7, s2, 0 -; GFX6-NEXT: s_sub_i32 s7, s12, s7 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s8, s2, 0 +; GFX6-NEXT: s_min_i32 s8, s2, 0 +; GFX6-NEXT: s_max_i32 s7, s2, 0 ; GFX6-NEXT: s_sub_i32 s8, s13, s8 -; GFX6-NEXT: s_cmp_gt_i32 s8, s6 -; GFX6-NEXT: s_cselect_b32 s6, s8, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 -; GFX6-NEXT: s_add_i32 s2, s2, s6 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_sub_i32 s7, s12, s7 +; GFX6-NEXT: s_max_i32 s6, s8, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s7 +; GFX6-NEXT: s_min_i32 s8, s3, 0 +; GFX6-NEXT: s_add_i32 s2, s2, s6 +; GFX6-NEXT: s_max_i32 s7, s3, 0 ; GFX6-NEXT: s_lshl_b32 s6, s9, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s7, s3, 0 -; GFX6-NEXT: s_sub_i32 s7, s12, s7 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s8, s3, 0 ; GFX6-NEXT: s_sub_i32 s8, s13, s8 -; GFX6-NEXT: s_cmp_gt_i32 s8, s6 -; GFX6-NEXT: s_cselect_b32 s6, s8, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 -; GFX6-NEXT: s_add_i32 s3, s3, s6 -; GFX6-NEXT: s_ashr_i32 s3, s3, 16 +; GFX6-NEXT: s_sub_i32 s7, s12, s7 +; GFX6-NEXT: s_max_i32 s6, s8, s6 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s7 +; GFX6-NEXT: s_min_i32 s8, s4, 0 +; GFX6-NEXT: s_add_i32 s3, s3, s6 +; GFX6-NEXT: s_max_i32 s7, s4, 0 ; GFX6-NEXT: s_lshl_b32 s6, s10, 16 -; GFX6-NEXT: s_cmp_gt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s7, s4, 0 -; GFX6-NEXT: s_sub_i32 s7, s12, s7 -; GFX6-NEXT: s_cmp_lt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s8, s4, 0 ; GFX6-NEXT: s_sub_i32 s8, s13, s8 -; GFX6-NEXT: s_cmp_gt_i32 s8, s6 -; GFX6-NEXT: s_cselect_b32 s6, s8, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 -; GFX6-NEXT: s_add_i32 s4, s4, s6 -; GFX6-NEXT: s_ashr_i32 s4, s4, 16 +; GFX6-NEXT: s_sub_i32 s7, s12, s7 +; GFX6-NEXT: s_max_i32 s6, s8, s6 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s7 +; GFX6-NEXT: s_min_i32 s8, s5, 0 +; GFX6-NEXT: s_add_i32 s4, s4, s6 +; GFX6-NEXT: s_max_i32 s7, s5, 0 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 -; GFX6-NEXT: s_cmp_gt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s7, s5, 0 -; GFX6-NEXT: s_sub_i32 s7, s12, s7 -; GFX6-NEXT: s_cmp_lt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s8, s5, 0 ; GFX6-NEXT: s_sub_i32 s8, s13, s8 -; GFX6-NEXT: s_cmp_gt_i32 s8, s6 -; GFX6-NEXT: s_cselect_b32 s6, s8, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s7 -; GFX6-NEXT: s_cselect_b32 s6, s6, s7 +; GFX6-NEXT: s_sub_i32 s7, s12, s7 +; GFX6-NEXT: s_max_i32 s6, s8, s6 +; GFX6-NEXT: s_min_i32 s6, s6, s7 ; GFX6-NEXT: s_add_i32 s5, s5, s6 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s6, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s6 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 @@ -3924,6 +3511,7 @@ ; GFX6-NEXT: s_ashr_i32 s5, s5, 16 ; GFX6-NEXT: s_and_b32 s3, s5, s6 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s2, s4, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -3932,110 +3520,86 @@ ; ; GFX8-LABEL: s_saddsat_v6i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s9, s3, 16 -; GFX8-NEXT: s_lshr_b32 s10, s4, 16 -; GFX8-NEXT: s_lshr_b32 s11, s5, 16 -; GFX8-NEXT: s_lshr_b32 s6, s0, 16 -; GFX8-NEXT: s_lshr_b32 s7, s1, 16 -; GFX8-NEXT: s_lshr_b32 s8, s2, 16 ; GFX8-NEXT: s_sext_i32_i16 s14, s0 ; GFX8-NEXT: s_sext_i32_i16 s15, 0 -; GFX8-NEXT: s_cmp_gt_i32 s14, s15 -; GFX8-NEXT: s_movk_i32 s12, 0x7fff -; GFX8-NEXT: s_cselect_b32 s16, s14, s15 -; GFX8-NEXT: s_sub_i32 s16, s12, s16 -; GFX8-NEXT: s_cmp_lt_i32 s14, s15 +; GFX8-NEXT: s_max_i32 s16, s14, s15 ; GFX8-NEXT: s_movk_i32 s13, 0x8000 -; GFX8-NEXT: s_cselect_b32 s14, s14, s15 +; GFX8-NEXT: s_min_i32 s14, s14, s15 ; GFX8-NEXT: s_sub_i32 s14, s13, s14 +; GFX8-NEXT: s_lshr_b32 s9, s3, 16 +; GFX8-NEXT: s_movk_i32 s12, 0x7fff ; GFX8-NEXT: s_sext_i32_i16 s14, s14 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s14, s3 -; GFX8-NEXT: s_cselect_b32 s3, s14, s3 +; GFX8-NEXT: s_max_i32 s3, s14, s3 +; GFX8-NEXT: s_sub_i32 s16, s12, s16 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s14, s16 -; GFX8-NEXT: s_cmp_lt_i32 s3, s14 -; GFX8-NEXT: s_cselect_b32 s3, s3, s14 +; GFX8-NEXT: s_lshr_b32 s6, s0, 16 +; GFX8-NEXT: s_min_i32 s3, s3, s14 ; GFX8-NEXT: s_add_i32 s0, s0, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s6 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s14, s3, s15 -; GFX8-NEXT: s_sub_i32 s14, s12, s14 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 +; GFX8-NEXT: s_max_i32 s14, s3, s15 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sub_i32 s3, s13, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 -; GFX8-NEXT: s_cmp_gt_i32 s3, s9 -; GFX8-NEXT: s_cselect_b32 s3, s3, s9 +; GFX8-NEXT: s_max_i32 s3, s3, s9 +; GFX8-NEXT: s_sub_i32 s14, s12, s14 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s9, s14 -; GFX8-NEXT: s_cmp_lt_i32 s3, s9 -; GFX8-NEXT: s_cselect_b32 s3, s3, s9 +; GFX8-NEXT: s_min_i32 s3, s3, s9 ; GFX8-NEXT: s_add_i32 s6, s6, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s9, s3, s15 -; GFX8-NEXT: s_sub_i32 s9, s12, s9 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 +; GFX8-NEXT: s_max_i32 s9, s3, s15 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sub_i32 s3, s13, s3 +; GFX8-NEXT: s_lshr_b32 s10, s4, 16 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_max_i32 s3, s3, s4 +; GFX8-NEXT: s_sub_i32 s9, s12, s9 ; GFX8-NEXT: s_sext_i32_i16 s4, s9 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_lshr_b32 s7, s1, 16 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_add_i32 s1, s1, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s7 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s4, s3, s15 -; GFX8-NEXT: s_sub_i32 s4, s12, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 +; GFX8-NEXT: s_max_i32 s4, s3, s15 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sub_i32 s3, s13, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s9, s10 -; GFX8-NEXT: s_cmp_gt_i32 s3, s9 -; GFX8-NEXT: s_cselect_b32 s3, s3, s9 +; GFX8-NEXT: s_sub_i32 s4, s12, s4 +; GFX8-NEXT: s_max_i32 s3, s3, s9 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_add_i32 s7, s7, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s2 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s4, s3, s15 -; GFX8-NEXT: s_sub_i32 s4, s12, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 +; GFX8-NEXT: s_max_i32 s4, s3, s15 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sub_i32 s3, s13, s3 +; GFX8-NEXT: s_lshr_b32 s11, s5, 16 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_gt_i32 s3, s5 -; GFX8-NEXT: s_cselect_b32 s3, s3, s5 +; GFX8-NEXT: s_max_i32 s3, s3, s5 +; GFX8-NEXT: s_sub_i32 s4, s12, s4 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_lshr_b32 s8, s2, 16 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_add_i32 s2, s2, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s8 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s4, s3, s15 -; GFX8-NEXT: s_sub_i32 s4, s12, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 +; GFX8-NEXT: s_max_i32 s4, s3, s15 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sub_i32 s3, s13, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s5, s11 -; GFX8-NEXT: s_cmp_gt_i32 s3, s5 -; GFX8-NEXT: s_cselect_b32 s3, s3, s5 +; GFX8-NEXT: s_sub_i32 s4, s12, s4 +; GFX8-NEXT: s_max_i32 s3, s3, s5 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_add_i32 s8, s8, s3 ; GFX8-NEXT: s_bfe_u32 s3, s6, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 @@ -4285,132 +3849,100 @@ ; GFX6-LABEL: s_saddsat_v8i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_brev_b32 s17, 1 +; GFX6-NEXT: s_min_i32 s19, s0, 0 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, 0 +; GFX6-NEXT: s_sub_i32 s19, s17, s19 ; GFX6-NEXT: s_brev_b32 s16, -2 -; GFX6-NEXT: s_cselect_b32 s18, s0, 0 +; GFX6-NEXT: s_max_i32 s18, s0, 0 ; GFX6-NEXT: s_sub_i32 s18, s16, s18 -; GFX6-NEXT: s_cmp_lt_i32 s0, 0 -; GFX6-NEXT: s_brev_b32 s17, 1 -; GFX6-NEXT: s_cselect_b32 s19, s0, 0 -; GFX6-NEXT: s_sub_i32 s19, s17, s19 -; GFX6-NEXT: s_cmp_gt_i32 s19, s8 -; GFX6-NEXT: s_cselect_b32 s8, s19, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s18 -; GFX6-NEXT: s_cselect_b32 s8, s8, s18 -; GFX6-NEXT: s_add_i32 s0, s0, s8 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s8, s19, s8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s18 +; GFX6-NEXT: s_min_i32 s18, s1, 0 +; GFX6-NEXT: s_add_i32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s8, s9, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s9, s1, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s1, 0 -; GFX6-NEXT: s_cselect_b32 s18, s1, 0 +; GFX6-NEXT: s_max_i32 s9, s1, 0 ; GFX6-NEXT: s_sub_i32 s18, s17, s18 -; GFX6-NEXT: s_cmp_gt_i32 s18, s8 -; GFX6-NEXT: s_cselect_b32 s8, s18, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s1, s1, s8 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s18, s8 +; GFX6-NEXT: s_min_i32 s8, s8, s9 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s8 ; GFX6-NEXT: s_lshl_b32 s8, s10, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s9, s2, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s2, 0 -; GFX6-NEXT: s_cselect_b32 s10, s2, 0 +; GFX6-NEXT: s_min_i32 s10, s2, 0 +; GFX6-NEXT: s_max_i32 s9, s2, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s2, s2, s8 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s9 +; GFX6-NEXT: s_min_i32 s10, s3, 0 +; GFX6-NEXT: s_add_i32 s2, s2, s8 +; GFX6-NEXT: s_max_i32 s9, s3, 0 ; GFX6-NEXT: s_lshl_b32 s8, s11, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s9, s3, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s3, 0 -; GFX6-NEXT: s_cselect_b32 s10, s3, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s3, s3, s8 -; GFX6-NEXT: s_ashr_i32 s3, s3, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s9 +; GFX6-NEXT: s_min_i32 s10, s4, 0 +; GFX6-NEXT: s_add_i32 s3, s3, s8 +; GFX6-NEXT: s_max_i32 s9, s4, 0 ; GFX6-NEXT: s_lshl_b32 s8, s12, 16 -; GFX6-NEXT: s_cmp_gt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s9, s4, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s4, 0 -; GFX6-NEXT: s_cselect_b32 s10, s4, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s4, s4, s8 -; GFX6-NEXT: s_ashr_i32 s4, s4, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s9 +; GFX6-NEXT: s_min_i32 s10, s5, 0 +; GFX6-NEXT: s_add_i32 s4, s4, s8 +; GFX6-NEXT: s_max_i32 s9, s5, 0 ; GFX6-NEXT: s_lshl_b32 s8, s13, 16 -; GFX6-NEXT: s_cmp_gt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s9, s5, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s5, 0 -; GFX6-NEXT: s_cselect_b32 s10, s5, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s5, s5, s8 -; GFX6-NEXT: s_ashr_i32 s5, s5, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s9 +; GFX6-NEXT: s_min_i32 s10, s6, 0 +; GFX6-NEXT: s_add_i32 s5, s5, s8 +; GFX6-NEXT: s_max_i32 s9, s6, 0 ; GFX6-NEXT: s_lshl_b32 s8, s14, 16 -; GFX6-NEXT: s_cmp_gt_i32 s6, 0 -; GFX6-NEXT: s_cselect_b32 s9, s6, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s6, 0 -; GFX6-NEXT: s_cselect_b32 s10, s6, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 -; GFX6-NEXT: s_add_i32 s6, s6, s8 -; GFX6-NEXT: s_ashr_i32 s6, s6, 16 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s9 +; GFX6-NEXT: s_min_i32 s10, s7, 0 +; GFX6-NEXT: s_add_i32 s6, s6, s8 +; GFX6-NEXT: s_max_i32 s9, s7, 0 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 -; GFX6-NEXT: s_cmp_gt_i32 s7, 0 -; GFX6-NEXT: s_cselect_b32 s9, s7, 0 -; GFX6-NEXT: s_sub_i32 s9, s16, s9 -; GFX6-NEXT: s_cmp_lt_i32 s7, 0 -; GFX6-NEXT: s_cselect_b32 s10, s7, 0 ; GFX6-NEXT: s_sub_i32 s10, s17, s10 -; GFX6-NEXT: s_cmp_gt_i32 s10, s8 -; GFX6-NEXT: s_cselect_b32 s8, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s9 -; GFX6-NEXT: s_cselect_b32 s8, s8, s9 +; GFX6-NEXT: s_sub_i32 s9, s16, s9 +; GFX6-NEXT: s_max_i32 s8, s10, s8 +; GFX6-NEXT: s_min_i32 s8, s8, s9 ; GFX6-NEXT: s_add_i32 s7, s7, s8 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s8, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s8 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s2, s8 ; GFX6-NEXT: s_and_b32 s2, s3, s8 +; GFX6-NEXT: s_ashr_i32 s5, s5, 16 ; GFX6-NEXT: s_and_b32 s3, s5, s8 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: s_ashr_i32 s7, s7, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s2, s4, s8 ; GFX6-NEXT: s_and_b32 s4, s7, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_ashr_i32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s6, s8 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 @@ -4419,144 +3951,112 @@ ; ; GFX8-LABEL: s_saddsat_v8i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s12, s4, 16 -; GFX8-NEXT: s_lshr_b32 s13, s5, 16 -; GFX8-NEXT: s_lshr_b32 s14, s6, 16 -; GFX8-NEXT: s_lshr_b32 s8, s0, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 16 -; GFX8-NEXT: s_lshr_b32 s10, s2, 16 -; GFX8-NEXT: s_lshr_b32 s11, s3, 16 -; GFX8-NEXT: s_lshr_b32 s15, s7, 16 ; GFX8-NEXT: s_sext_i32_i16 s18, s0 ; GFX8-NEXT: s_sext_i32_i16 s19, 0 -; GFX8-NEXT: s_cmp_gt_i32 s18, s19 -; GFX8-NEXT: s_movk_i32 s16, 0x7fff -; GFX8-NEXT: s_cselect_b32 s20, s18, s19 -; GFX8-NEXT: s_sub_i32 s20, s16, s20 -; GFX8-NEXT: s_cmp_lt_i32 s18, s19 +; GFX8-NEXT: s_max_i32 s20, s18, s19 ; GFX8-NEXT: s_movk_i32 s17, 0x8000 -; GFX8-NEXT: s_cselect_b32 s18, s18, s19 +; GFX8-NEXT: s_min_i32 s18, s18, s19 ; GFX8-NEXT: s_sub_i32 s18, s17, s18 +; GFX8-NEXT: s_lshr_b32 s12, s4, 16 +; GFX8-NEXT: s_movk_i32 s16, 0x7fff ; GFX8-NEXT: s_sext_i32_i16 s18, s18 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s18, s4 -; GFX8-NEXT: s_cselect_b32 s4, s18, s4 +; GFX8-NEXT: s_max_i32 s4, s18, s4 +; GFX8-NEXT: s_sub_i32 s20, s16, s20 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s18, s20 -; GFX8-NEXT: s_cmp_lt_i32 s4, s18 -; GFX8-NEXT: s_cselect_b32 s4, s4, s18 +; GFX8-NEXT: s_lshr_b32 s8, s0, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s18 ; GFX8-NEXT: s_add_i32 s0, s0, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s8 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s18, s4, s19 -; GFX8-NEXT: s_sub_i32 s18, s16, s18 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s18, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s12, s12 -; GFX8-NEXT: s_cmp_gt_i32 s4, s12 -; GFX8-NEXT: s_cselect_b32 s4, s4, s12 +; GFX8-NEXT: s_max_i32 s4, s4, s12 +; GFX8-NEXT: s_sub_i32 s18, s16, s18 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s12, s18 -; GFX8-NEXT: s_cmp_lt_i32 s4, s12 -; GFX8-NEXT: s_cselect_b32 s4, s4, s12 +; GFX8-NEXT: s_min_i32 s4, s4, s12 ; GFX8-NEXT: s_add_i32 s8, s8, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s12, s4, s19 -; GFX8-NEXT: s_sub_i32 s12, s16, s12 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s12, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 +; GFX8-NEXT: s_lshr_b32 s13, s5, 16 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s5 +; GFX8-NEXT: s_sub_i32 s12, s16, s12 ; GFX8-NEXT: s_sext_i32_i16 s5, s12 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_lshr_b32 s9, s1, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s1, s1, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s9 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s5, s4, s19 -; GFX8-NEXT: s_sub_i32 s5, s16, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s5, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s12, s13 -; GFX8-NEXT: s_cmp_gt_i32 s4, s12 -; GFX8-NEXT: s_cselect_b32 s4, s4, s12 +; GFX8-NEXT: s_sub_i32 s5, s16, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s12 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s9, s9, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s2 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s5, s4, s19 -; GFX8-NEXT: s_sub_i32 s5, s16, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s5, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 +; GFX8-NEXT: s_lshr_b32 s14, s6, 16 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_max_i32 s4, s4, s6 +; GFX8-NEXT: s_sub_i32 s5, s16, s5 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_lshr_b32 s10, s2, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s2, s2, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s10 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s5, s4, s19 -; GFX8-NEXT: s_sub_i32 s5, s16, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s5, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s14 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_max_i32 s4, s4, s6 +; GFX8-NEXT: s_sub_i32 s5, s16, s5 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s10, s10, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s3 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s5, s4, s19 -; GFX8-NEXT: s_sub_i32 s5, s16, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s5, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s7 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_sub_i32 s5, s16, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_lshr_b32 s11, s3, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s3, s3, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s11 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s5, s4, s19 -; GFX8-NEXT: s_sub_i32 s5, s16, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 +; GFX8-NEXT: s_max_i32 s5, s4, s19 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sub_i32 s4, s17, s4 +; GFX8-NEXT: s_lshr_b32 s15, s7, 16 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s15 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_sub_i32 s5, s16, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_add_i32 s11, s11, s4 ; GFX8-NEXT: s_bfe_u32 s4, s8, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -61,17 +61,13 @@ ; GFX6-LABEL: s_ssubsat_i7: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 25 +; GFX6-NEXT: s_max_i32 s2, s0, -1 +; GFX6-NEXT: s_min_i32 s3, s0, -1 ; GFX6-NEXT: s_lshl_b32 s1, s1, 25 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX6-NEXT: s_cmp_gt_i32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s1, s2, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 25 ; GFX6-NEXT: ; return to shader part epilog @@ -80,23 +76,19 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, -1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s5, s3, s4 +; GFX8-NEXT: s_max_i32 s5, s3, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 -; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s1 -; GFX8-NEXT: s_cselect_b32 s1, s4, s1 +; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 +; GFX8-NEXT: s_max_i32 s1, s4, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2 @@ -183,17 +175,13 @@ ; GFX6-LABEL: s_ssubsat_i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 +; GFX6-NEXT: s_max_i32 s2, s0, -1 +; GFX6-NEXT: s_min_i32 s3, s0, -1 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX6-NEXT: s_cmp_gt_i32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s1, s2, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: ; return to shader part epilog @@ -202,23 +190,19 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, -1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s5, s3, s4 +; GFX8-NEXT: s_max_i32 s5, s3, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 -; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s1 -; GFX8-NEXT: s_cselect_b32 s1, s4, s1 +; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 +; GFX8-NEXT: s_max_i32 s1, s4, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2 @@ -360,38 +344,30 @@ ; GFX6-LABEL: s_ssubsat_v2i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshr_b32 s2, s0, 8 -; GFX6-NEXT: s_lshr_b32 s3, s1, 8 ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 -; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, -1 +; GFX6-NEXT: s_max_i32 s6, s0, -1 +; GFX6-NEXT: s_lshr_b32 s3, s1, 8 +; GFX6-NEXT: s_lshl_b32 s1, s1, 24 ; GFX6-NEXT: s_sub_i32 s6, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, -1 +; GFX6-NEXT: s_min_i32 s7, s0, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s5 -; GFX6-NEXT: s_cmp_gt_i32 s6, s1 -; GFX6-NEXT: s_cselect_b32 s1, s6, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s7 -; GFX6-NEXT: s_cselect_b32 s1, s1, s7 +; GFX6-NEXT: s_max_i32 s1, s6, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s7 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s3, s1, -1 +; GFX6-NEXT: s_max_i32 s3, s1, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s4, s1, -1 +; GFX6-NEXT: s_min_i32 s4, s1, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s5 -; GFX6-NEXT: s_cmp_gt_i32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s2, s2, s4 +; GFX6-NEXT: s_max_i32 s2, s3, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s4 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 -; GFX6-NEXT: s_movk_i32 s2, 0xff ; GFX6-NEXT: s_ashr_i32 s1, s1, 24 +; GFX6-NEXT: s_movk_i32 s2, 0xff +; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_and_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 @@ -403,50 +379,42 @@ ; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000 ; GFX8-NEXT: s_lshr_b32 s2, s0, 8 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4 -; GFX8-NEXT: s_lshr_b32 s3, s1, 8 -; GFX8-NEXT: s_lshl_b32 s1, s1, s4 ; GFX8-NEXT: s_sext_i32_i16 s7, s0 ; GFX8-NEXT: s_sext_i32_i16 s8, -1 -; GFX8-NEXT: s_cmp_gt_i32 s7, s8 +; GFX8-NEXT: s_max_i32 s9, s7, s8 ; GFX8-NEXT: s_movk_i32 s5, 0x7fff -; GFX8-NEXT: s_cselect_b32 s9, s7, s8 ; GFX8-NEXT: s_sub_i32 s9, s9, s5 -; GFX8-NEXT: s_cmp_lt_i32 s7, s8 +; GFX8-NEXT: s_lshr_b32 s3, s1, 8 +; GFX8-NEXT: s_lshl_b32 s1, s1, s4 ; GFX8-NEXT: s_movk_i32 s6, 0x8000 -; GFX8-NEXT: s_cselect_b32 s7, s7, s8 -; GFX8-NEXT: s_sub_i32 s7, s7, s6 +; GFX8-NEXT: s_min_i32 s7, s7, s8 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s9, s1 -; GFX8-NEXT: s_cselect_b32 s1, s9, s1 +; GFX8-NEXT: s_sub_i32 s7, s7, s6 +; GFX8-NEXT: s_max_i32 s1, s9, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s7, s7 -; GFX8-NEXT: s_cmp_lt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s1, s1, s7 +; GFX8-NEXT: s_min_i32 s1, s1, s7 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_lshl_b32 s1, s2, s4 ; GFX8-NEXT: s_lshl_b32 s2, s3, s4 -; GFX8-NEXT: s_ashr_i32 s0, s0, s4 ; GFX8-NEXT: s_sext_i32_i16 s3, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s7, s3, s8 +; GFX8-NEXT: s_max_i32 s7, s3, s8 ; GFX8-NEXT: s_sub_i32 s5, s7, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s3, s3, s8 -; GFX8-NEXT: s_sub_i32 s3, s3, s6 +; GFX8-NEXT: s_min_i32 s3, s3, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s5, s2 -; GFX8-NEXT: s_cselect_b32 s2, s5, s2 +; GFX8-NEXT: s_sub_i32 s3, s3, s6 +; GFX8-NEXT: s_max_i32 s2, s5, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_sub_i32 s1, s1, s2 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_movk_i32 s2, 0xff +; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s1, s1, s4 +; GFX8-NEXT: s_movk_i32 s2, 0xff +; GFX8-NEXT: s_ashr_i32 s0, s0, s4 ; GFX8-NEXT: s_and_b32 s1, s1, s2 ; GFX8-NEXT: s_and_b32 s0, s0, s2 ; GFX8-NEXT: s_lshl_b32 s1, s1, s4 @@ -714,68 +682,52 @@ ; GFX6-NEXT: s_lshr_b32 s2, s0, 8 ; GFX6-NEXT: s_lshr_b32 s3, s0, 16 ; GFX6-NEXT: s_lshr_b32 s4, s0, 24 +; GFX6-NEXT: s_lshl_b32 s0, s0, 24 +; GFX6-NEXT: s_brev_b32 s8, -2 +; GFX6-NEXT: s_max_i32 s10, s0, -1 ; GFX6-NEXT: s_lshr_b32 s5, s1, 8 ; GFX6-NEXT: s_lshr_b32 s6, s1, 16 ; GFX6-NEXT: s_lshr_b32 s7, s1, 24 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, -1 +; GFX6-NEXT: s_min_i32 s11, s0, -1 ; GFX6-NEXT: s_sub_i32 s11, s11, s9 -; GFX6-NEXT: s_cmp_gt_i32 s10, s1 -; GFX6-NEXT: s_cselect_b32 s1, s10, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s11 -; GFX6-NEXT: s_cselect_b32 s1, s1, s11 +; GFX6-NEXT: s_max_i32 s1, s10, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s11 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s2, s5, 24 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s5, s1, -1 +; GFX6-NEXT: s_max_i32 s5, s1, -1 +; GFX6-NEXT: s_min_i32 s10, s1, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s10, s1, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s2 -; GFX6-NEXT: s_cselect_b32 s2, s5, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s10 -; GFX6-NEXT: s_cselect_b32 s2, s2, s10 +; GFX6-NEXT: s_max_i32 s2, s5, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s10 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_ashr_i32 s1, s1, 24 +; GFX6-NEXT: s_max_i32 s5, s2, -1 ; GFX6-NEXT: s_lshl_b32 s3, s6, 24 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s5, s2, -1 +; GFX6-NEXT: s_min_i32 s6, s2, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s6, s2, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s3 -; GFX6-NEXT: s_cselect_b32 s3, s5, s3 -; GFX6-NEXT: s_cmp_lt_i32 s3, s6 -; GFX6-NEXT: s_cselect_b32 s3, s3, s6 +; GFX6-NEXT: s_max_i32 s3, s5, s3 +; GFX6-NEXT: s_min_i32 s3, s3, s6 ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 -; GFX6-NEXT: s_ashr_i32 s2, s2, 24 +; GFX6-NEXT: s_max_i32 s5, s3, -1 +; GFX6-NEXT: s_min_i32 s6, s3, -1 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s5, s3, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s6, s3, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s4, s4, s6 +; GFX6-NEXT: s_max_i32 s4, s5, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s6 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 +; GFX6-NEXT: s_ashr_i32 s1, s1, 24 ; GFX6-NEXT: s_movk_i32 s4, 0xff +; GFX6-NEXT: s_ashr_i32 s0, s0, 24 ; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_ashr_i32 s2, s2, 24 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s1 @@ -795,91 +747,75 @@ ; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_lshr_b32 s4, s0, 24 ; GFX8-NEXT: s_lshl_b32 s0, s0, s8 -; GFX8-NEXT: s_lshr_b32 s5, s1, 8 -; GFX8-NEXT: s_lshr_b32 s6, s1, 16 -; GFX8-NEXT: s_lshr_b32 s7, s1, 24 -; GFX8-NEXT: s_lshl_b32 s1, s1, s8 ; GFX8-NEXT: s_sext_i32_i16 s11, s0 ; GFX8-NEXT: s_sext_i32_i16 s12, -1 -; GFX8-NEXT: s_cmp_gt_i32 s11, s12 +; GFX8-NEXT: s_max_i32 s13, s11, s12 ; GFX8-NEXT: s_movk_i32 s9, 0x7fff -; GFX8-NEXT: s_cselect_b32 s13, s11, s12 ; GFX8-NEXT: s_sub_i32 s13, s13, s9 -; GFX8-NEXT: s_cmp_lt_i32 s11, s12 +; GFX8-NEXT: s_lshr_b32 s5, s1, 8 +; GFX8-NEXT: s_lshr_b32 s6, s1, 16 +; GFX8-NEXT: s_lshr_b32 s7, s1, 24 +; GFX8-NEXT: s_lshl_b32 s1, s1, s8 ; GFX8-NEXT: s_movk_i32 s10, 0x8000 -; GFX8-NEXT: s_cselect_b32 s11, s11, s12 -; GFX8-NEXT: s_sub_i32 s11, s11, s10 +; GFX8-NEXT: s_min_i32 s11, s11, s12 ; GFX8-NEXT: s_sext_i32_i16 s13, s13 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s13, s1 -; GFX8-NEXT: s_cselect_b32 s1, s13, s1 +; GFX8-NEXT: s_sub_i32 s11, s11, s10 +; GFX8-NEXT: s_max_i32 s1, s13, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s11, s11 -; GFX8-NEXT: s_cmp_lt_i32 s1, s11 -; GFX8-NEXT: s_cselect_b32 s1, s1, s11 +; GFX8-NEXT: s_min_i32 s1, s1, s11 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_lshl_b32 s1, s2, s8 ; GFX8-NEXT: s_lshl_b32 s2, s5, s8 -; GFX8-NEXT: s_ashr_i32 s0, s0, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s1 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s11, s5, s12 +; GFX8-NEXT: s_max_i32 s11, s5, s12 ; GFX8-NEXT: s_sub_i32 s11, s11, s9 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 -; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sext_i32_i16 s11, s11 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s11, s2 -; GFX8-NEXT: s_cselect_b32 s2, s11, s2 +; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_max_i32 s2, s11, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s2, s5 -; GFX8-NEXT: s_cselect_b32 s2, s2, s5 +; GFX8-NEXT: s_min_i32 s2, s2, s5 ; GFX8-NEXT: s_sub_i32 s1, s1, s2 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_lshl_b32 s2, s3, s8 -; GFX8-NEXT: s_lshl_b32 s3, s6, s8 -; GFX8-NEXT: s_ashr_i32 s1, s1, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s2 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s6, s5, s12 +; GFX8-NEXT: s_lshl_b32 s3, s6, s8 +; GFX8-NEXT: s_max_i32 s6, s5, s12 ; GFX8-NEXT: s_sub_i32 s6, s6, s9 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 -; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s6, s3 -; GFX8-NEXT: s_cselect_b32 s3, s6, s3 +; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_max_i32 s3, s6, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, s5 -; GFX8-NEXT: s_cselect_b32 s3, s3, s5 +; GFX8-NEXT: s_min_i32 s3, s3, s5 ; GFX8-NEXT: s_sub_i32 s2, s2, s3 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_lshl_b32 s3, s4, s8 -; GFX8-NEXT: s_lshl_b32 s4, s7, s8 -; GFX8-NEXT: s_ashr_i32 s2, s2, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s3 -; GFX8-NEXT: s_cmp_gt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s6, s5, s12 +; GFX8-NEXT: s_max_i32 s6, s5, s12 +; GFX8-NEXT: s_lshl_b32 s4, s7, s8 ; GFX8-NEXT: s_sub_i32 s6, s6, s9 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 -; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s6, s4 -; GFX8-NEXT: s_cselect_b32 s4, s6, s4 +; GFX8-NEXT: s_sub_i32 s5, s5, s10 +; GFX8-NEXT: s_max_i32 s4, s6, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_sext_i32_i16 s1, s1 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_sub_i32 s3, s3, s4 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_ashr_i32 s1, s1, s8 ; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_ashr_i32 s0, s0, s8 +; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_and_b32 s1, s1, s4 +; GFX8-NEXT: s_ashr_i32 s2, s2, s8 ; GFX8-NEXT: s_and_b32 s0, s0, s4 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 @@ -1046,17 +982,13 @@ ; GFX6-LABEL: s_ssubsat_i24: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 +; GFX6-NEXT: s_max_i32 s2, s0, -1 +; GFX6-NEXT: s_min_i32 s3, s0, -1 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX6-NEXT: s_cmp_gt_i32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s1, s2, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 8 ; GFX6-NEXT: ; return to shader part epilog @@ -1145,31 +1077,23 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) { ; GFX6-LABEL: s_ssubsat_i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 +; GFX6-NEXT: s_max_i32 s2, s0, -1 +; GFX6-NEXT: s_min_i32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX6-NEXT: s_cmp_gt_i32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s1, s2, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 -; GFX8-NEXT: s_cselect_b32 s2, s0, -1 +; GFX8-NEXT: s_max_i32 s2, s0, -1 +; GFX8-NEXT: s_min_i32 s3, s0, -1 ; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 -; GFX8-NEXT: s_cselect_b32 s3, s0, -1 ; GFX8-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX8-NEXT: s_cmp_gt_i32 s2, s1 -; GFX8-NEXT: s_cselect_b32 s1, s2, s1 -; GFX8-NEXT: s_cmp_lt_i32 s1, s3 -; GFX8-NEXT: s_cselect_b32 s1, s1, s3 +; GFX8-NEXT: s_max_i32 s1, s2, s1 +; GFX8-NEXT: s_min_i32 s1, s1, s3 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1192,11 +1116,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) { ; GFX6-LABEL: ssubsat_i32_sv: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s1, s0, -1 +; GFX6-NEXT: s_max_i32 s1, s0, -1 +; GFX6-NEXT: s_min_i32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000 ; GFX6-NEXT: v_max_i32_e32 v0, s1, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s2, v0 @@ -1205,11 +1127,9 @@ ; ; GFX8-LABEL: ssubsat_i32_sv: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 -; GFX8-NEXT: s_cselect_b32 s1, s0, -1 +; GFX8-NEXT: s_max_i32 s1, s0, -1 +; GFX8-NEXT: s_min_i32 s2, s0, -1 ; GFX8-NEXT: s_sub_i32 s1, s1, 0x7fffffff -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 -; GFX8-NEXT: s_cselect_b32 s2, s0, -1 ; GFX8-NEXT: s_sub_i32 s2, s2, 0x80000000 ; GFX8-NEXT: v_max_i32_e32 v0, s1, v0 ; GFX8-NEXT: v_min_i32_e32 v0, s2, v0 @@ -1331,57 +1251,41 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v2i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, -1 +; GFX6-NEXT: s_max_i32 s6, s0, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, -1 +; GFX6-NEXT: s_min_i32 s7, s0, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s5 -; GFX6-NEXT: s_cmp_gt_i32 s6, s2 -; GFX6-NEXT: s_cselect_b32 s2, s6, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s7 -; GFX6-NEXT: s_cselect_b32 s2, s2, s7 +; GFX6-NEXT: s_max_i32 s2, s6, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s7 ; GFX6-NEXT: s_sub_i32 s0, s0, s2 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s2, s1, -1 +; GFX6-NEXT: s_max_i32 s2, s1, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, s4 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s4, s1, -1 +; GFX6-NEXT: s_min_i32 s4, s1, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s5 -; GFX6-NEXT: s_cmp_gt_i32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s2, s2, s3 -; GFX6-NEXT: s_cmp_lt_i32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s2, s2, s4 +; GFX6-NEXT: s_max_i32 s2, s2, s3 +; GFX6-NEXT: s_min_i32 s2, s2, s4 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_v2i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s4, -2 -; GFX8-NEXT: s_cselect_b32 s6, s0, -1 +; GFX8-NEXT: s_max_i32 s6, s0, -1 ; GFX8-NEXT: s_sub_i32 s6, s6, s4 -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: s_cselect_b32 s7, s0, -1 +; GFX8-NEXT: s_min_i32 s7, s0, -1 ; GFX8-NEXT: s_sub_i32 s7, s7, s5 -; GFX8-NEXT: s_cmp_gt_i32 s6, s2 -; GFX8-NEXT: s_cselect_b32 s2, s6, s2 -; GFX8-NEXT: s_cmp_lt_i32 s2, s7 -; GFX8-NEXT: s_cselect_b32 s2, s2, s7 +; GFX8-NEXT: s_max_i32 s2, s6, s2 +; GFX8-NEXT: s_min_i32 s2, s2, s7 ; GFX8-NEXT: s_sub_i32 s0, s0, s2 -; GFX8-NEXT: s_cmp_gt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s2, s1, -1 +; GFX8-NEXT: s_max_i32 s2, s1, -1 ; GFX8-NEXT: s_sub_i32 s2, s2, s4 -; GFX8-NEXT: s_cmp_lt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s4, s1, -1 +; GFX8-NEXT: s_min_i32 s4, s1, -1 ; GFX8-NEXT: s_sub_i32 s4, s4, s5 -; GFX8-NEXT: s_cmp_gt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 -; GFX8-NEXT: s_cmp_lt_i32 s2, s4 -; GFX8-NEXT: s_cselect_b32 s2, s2, s4 +; GFX8-NEXT: s_max_i32 s2, s2, s3 +; GFX8-NEXT: s_min_i32 s2, s2, s4 ; GFX8-NEXT: s_sub_i32 s1, s1, s2 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1486,79 +1390,55 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v3i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s6, -2 -; GFX6-NEXT: s_cselect_b32 s8, s0, -1 +; GFX6-NEXT: s_max_i32 s8, s0, -1 ; GFX6-NEXT: s_sub_i32 s8, s8, s6 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s7, 1 -; GFX6-NEXT: s_cselect_b32 s9, s0, -1 +; GFX6-NEXT: s_min_i32 s9, s0, -1 +; GFX6-NEXT: s_max_i32 s3, s8, s3 ; GFX6-NEXT: s_sub_i32 s9, s9, s7 -; GFX6-NEXT: s_cmp_gt_i32 s8, s3 -; GFX6-NEXT: s_cselect_b32 s3, s8, s3 -; GFX6-NEXT: s_cmp_lt_i32 s3, s9 -; GFX6-NEXT: s_cselect_b32 s3, s3, s9 +; GFX6-NEXT: s_min_i32 s3, s3, s9 ; GFX6-NEXT: s_sub_i32 s0, s0, s3 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s3, s1, -1 +; GFX6-NEXT: s_max_i32 s3, s1, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, s6 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s8, s1, -1 +; GFX6-NEXT: s_min_i32 s8, s1, -1 +; GFX6-NEXT: s_max_i32 s3, s3, s4 ; GFX6-NEXT: s_sub_i32 s8, s8, s7 -; GFX6-NEXT: s_cmp_gt_i32 s3, s4 -; GFX6-NEXT: s_cselect_b32 s3, s3, s4 -; GFX6-NEXT: s_cmp_lt_i32 s3, s8 -; GFX6-NEXT: s_cselect_b32 s3, s3, s8 +; GFX6-NEXT: s_min_i32 s3, s3, s8 ; GFX6-NEXT: s_sub_i32 s1, s1, s3 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s3, s2, -1 +; GFX6-NEXT: s_max_i32 s3, s2, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, s6 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s4, s2, -1 +; GFX6-NEXT: s_min_i32 s4, s2, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s7 -; GFX6-NEXT: s_cmp_gt_i32 s3, s5 -; GFX6-NEXT: s_cselect_b32 s3, s3, s5 -; GFX6-NEXT: s_cmp_lt_i32 s3, s4 -; GFX6-NEXT: s_cselect_b32 s3, s3, s4 +; GFX6-NEXT: s_max_i32 s3, s3, s5 +; GFX6-NEXT: s_min_i32 s3, s3, s4 ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_v3i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s6, -2 -; GFX8-NEXT: s_cselect_b32 s8, s0, -1 +; GFX8-NEXT: s_max_i32 s8, s0, -1 ; GFX8-NEXT: s_sub_i32 s8, s8, s6 -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s7, 1 -; GFX8-NEXT: s_cselect_b32 s9, s0, -1 +; GFX8-NEXT: s_min_i32 s9, s0, -1 +; GFX8-NEXT: s_max_i32 s3, s8, s3 ; GFX8-NEXT: s_sub_i32 s9, s9, s7 -; GFX8-NEXT: s_cmp_gt_i32 s8, s3 -; GFX8-NEXT: s_cselect_b32 s3, s8, s3 -; GFX8-NEXT: s_cmp_lt_i32 s3, s9 -; GFX8-NEXT: s_cselect_b32 s3, s3, s9 +; GFX8-NEXT: s_min_i32 s3, s3, s9 ; GFX8-NEXT: s_sub_i32 s0, s0, s3 -; GFX8-NEXT: s_cmp_gt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s3, s1, -1 +; GFX8-NEXT: s_max_i32 s3, s1, -1 ; GFX8-NEXT: s_sub_i32 s3, s3, s6 -; GFX8-NEXT: s_cmp_lt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s8, s1, -1 +; GFX8-NEXT: s_min_i32 s8, s1, -1 +; GFX8-NEXT: s_max_i32 s3, s3, s4 ; GFX8-NEXT: s_sub_i32 s8, s8, s7 -; GFX8-NEXT: s_cmp_gt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s8 -; GFX8-NEXT: s_cselect_b32 s3, s3, s8 +; GFX8-NEXT: s_min_i32 s3, s3, s8 ; GFX8-NEXT: s_sub_i32 s1, s1, s3 -; GFX8-NEXT: s_cmp_gt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s3, s2, -1 +; GFX8-NEXT: s_max_i32 s3, s2, -1 ; GFX8-NEXT: s_sub_i32 s3, s3, s6 -; GFX8-NEXT: s_cmp_lt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s4, s2, -1 +; GFX8-NEXT: s_min_i32 s4, s2, -1 ; GFX8-NEXT: s_sub_i32 s4, s4, s7 -; GFX8-NEXT: s_cmp_gt_i32 s3, s5 -; GFX8-NEXT: s_cselect_b32 s3, s3, s5 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_max_i32 s3, s3, s5 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_sub_i32 s2, s2, s3 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1684,101 +1564,69 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, -1 +; GFX6-NEXT: s_max_i32 s10, s0, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, -1 +; GFX6-NEXT: s_min_i32 s11, s0, -1 +; GFX6-NEXT: s_max_i32 s4, s10, s4 ; GFX6-NEXT: s_sub_i32 s11, s11, s9 -; GFX6-NEXT: s_cmp_gt_i32 s10, s4 -; GFX6-NEXT: s_cselect_b32 s4, s10, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s4, s11 +; GFX6-NEXT: s_min_i32 s4, s4, s11 ; GFX6-NEXT: s_sub_i32 s0, s0, s4 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s4, s1, -1 +; GFX6-NEXT: s_max_i32 s4, s1, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s10, s1, -1 +; GFX6-NEXT: s_min_i32 s10, s1, -1 +; GFX6-NEXT: s_max_i32 s4, s4, s5 ; GFX6-NEXT: s_sub_i32 s10, s10, s9 -; GFX6-NEXT: s_cmp_gt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 -; GFX6-NEXT: s_cmp_lt_i32 s4, s10 -; GFX6-NEXT: s_cselect_b32 s4, s4, s10 +; GFX6-NEXT: s_min_i32 s4, s4, s10 ; GFX6-NEXT: s_sub_i32 s1, s1, s4 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s4, s2, -1 +; GFX6-NEXT: s_max_i32 s4, s2, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s5, s2, -1 +; GFX6-NEXT: s_min_i32 s5, s2, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s9 -; GFX6-NEXT: s_cmp_gt_i32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s4, s4, s6 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 +; GFX6-NEXT: s_max_i32 s4, s4, s6 +; GFX6-NEXT: s_min_i32 s4, s4, s5 ; GFX6-NEXT: s_sub_i32 s2, s2, s4 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s4, s3, -1 +; GFX6-NEXT: s_max_i32 s4, s3, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s5, s3, -1 +; GFX6-NEXT: s_min_i32 s5, s3, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s9 -; GFX6-NEXT: s_cmp_gt_i32 s4, s7 -; GFX6-NEXT: s_cselect_b32 s4, s4, s7 -; GFX6-NEXT: s_cmp_lt_i32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 +; GFX6-NEXT: s_max_i32 s4, s4, s7 +; GFX6-NEXT: s_min_i32 s4, s4, s5 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_v4i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s8, -2 -; GFX8-NEXT: s_cselect_b32 s10, s0, -1 +; GFX8-NEXT: s_max_i32 s10, s0, -1 ; GFX8-NEXT: s_sub_i32 s10, s10, s8 -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s9, 1 -; GFX8-NEXT: s_cselect_b32 s11, s0, -1 +; GFX8-NEXT: s_min_i32 s11, s0, -1 +; GFX8-NEXT: s_max_i32 s4, s10, s4 ; GFX8-NEXT: s_sub_i32 s11, s11, s9 -; GFX8-NEXT: s_cmp_gt_i32 s10, s4 -; GFX8-NEXT: s_cselect_b32 s4, s10, s4 -; GFX8-NEXT: s_cmp_lt_i32 s4, s11 -; GFX8-NEXT: s_cselect_b32 s4, s4, s11 +; GFX8-NEXT: s_min_i32 s4, s4, s11 ; GFX8-NEXT: s_sub_i32 s0, s0, s4 -; GFX8-NEXT: s_cmp_gt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s4, s1, -1 +; GFX8-NEXT: s_max_i32 s4, s1, -1 ; GFX8-NEXT: s_sub_i32 s4, s4, s8 -; GFX8-NEXT: s_cmp_lt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s10, s1, -1 +; GFX8-NEXT: s_min_i32 s10, s1, -1 +; GFX8-NEXT: s_max_i32 s4, s4, s5 ; GFX8-NEXT: s_sub_i32 s10, s10, s9 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 -; GFX8-NEXT: s_cmp_lt_i32 s4, s10 -; GFX8-NEXT: s_cselect_b32 s4, s4, s10 +; GFX8-NEXT: s_min_i32 s4, s4, s10 ; GFX8-NEXT: s_sub_i32 s1, s1, s4 -; GFX8-NEXT: s_cmp_gt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s4, s2, -1 +; GFX8-NEXT: s_max_i32 s4, s2, -1 ; GFX8-NEXT: s_sub_i32 s4, s4, s8 -; GFX8-NEXT: s_cmp_lt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s5, s2, -1 +; GFX8-NEXT: s_min_i32 s5, s2, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s9 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s6 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_sub_i32 s2, s2, s4 -; GFX8-NEXT: s_cmp_gt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s4, s3, -1 +; GFX8-NEXT: s_max_i32 s4, s3, -1 ; GFX8-NEXT: s_sub_i32 s4, s4, s8 -; GFX8-NEXT: s_cmp_lt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s5, s3, -1 +; GFX8-NEXT: s_min_i32 s5, s3, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s9 -; GFX8-NEXT: s_cmp_gt_i32 s4, s7 -; GFX8-NEXT: s_cselect_b32 s4, s4, s7 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_max_i32 s4, s4, s7 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: s_sub_i32 s3, s3, s4 ; GFX8-NEXT: ; return to shader part epilog ; @@ -1929,123 +1777,83 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v5i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s10, -2 -; GFX6-NEXT: s_cselect_b32 s12, s0, -1 +; GFX6-NEXT: s_max_i32 s12, s0, -1 ; GFX6-NEXT: s_sub_i32 s12, s12, s10 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s11, 1 -; GFX6-NEXT: s_cselect_b32 s13, s0, -1 +; GFX6-NEXT: s_min_i32 s13, s0, -1 +; GFX6-NEXT: s_max_i32 s5, s12, s5 ; GFX6-NEXT: s_sub_i32 s13, s13, s11 -; GFX6-NEXT: s_cmp_gt_i32 s12, s5 -; GFX6-NEXT: s_cselect_b32 s5, s12, s5 -; GFX6-NEXT: s_cmp_lt_i32 s5, s13 -; GFX6-NEXT: s_cselect_b32 s5, s5, s13 +; GFX6-NEXT: s_min_i32 s5, s5, s13 ; GFX6-NEXT: s_sub_i32 s0, s0, s5 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s5, s1, -1 +; GFX6-NEXT: s_max_i32 s5, s1, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s10 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s12, s1, -1 +; GFX6-NEXT: s_min_i32 s12, s1, -1 +; GFX6-NEXT: s_max_i32 s5, s5, s6 ; GFX6-NEXT: s_sub_i32 s12, s12, s11 -; GFX6-NEXT: s_cmp_gt_i32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 -; GFX6-NEXT: s_cmp_lt_i32 s5, s12 -; GFX6-NEXT: s_cselect_b32 s5, s5, s12 +; GFX6-NEXT: s_min_i32 s5, s5, s12 ; GFX6-NEXT: s_sub_i32 s1, s1, s5 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s5, s2, -1 +; GFX6-NEXT: s_max_i32 s5, s2, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s10 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s6, s2, -1 +; GFX6-NEXT: s_min_i32 s6, s2, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s11 -; GFX6-NEXT: s_cmp_gt_i32 s5, s7 -; GFX6-NEXT: s_cselect_b32 s5, s5, s7 -; GFX6-NEXT: s_cmp_lt_i32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 +; GFX6-NEXT: s_max_i32 s5, s5, s7 +; GFX6-NEXT: s_min_i32 s5, s5, s6 ; GFX6-NEXT: s_sub_i32 s2, s2, s5 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s5, s3, -1 +; GFX6-NEXT: s_max_i32 s5, s3, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s10 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s6, s3, -1 +; GFX6-NEXT: s_min_i32 s6, s3, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s11 -; GFX6-NEXT: s_cmp_gt_i32 s5, s8 -; GFX6-NEXT: s_cselect_b32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 +; GFX6-NEXT: s_max_i32 s5, s5, s8 +; GFX6-NEXT: s_min_i32 s5, s5, s6 ; GFX6-NEXT: s_sub_i32 s3, s3, s5 -; GFX6-NEXT: s_cmp_gt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s5, s4, -1 +; GFX6-NEXT: s_max_i32 s5, s4, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s10 -; GFX6-NEXT: s_cmp_lt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s6, s4, -1 +; GFX6-NEXT: s_min_i32 s6, s4, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s11 -; GFX6-NEXT: s_cmp_gt_i32 s5, s9 -; GFX6-NEXT: s_cselect_b32 s5, s5, s9 -; GFX6-NEXT: s_cmp_lt_i32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 +; GFX6-NEXT: s_max_i32 s5, s5, s9 +; GFX6-NEXT: s_min_i32 s5, s5, s6 ; GFX6-NEXT: s_sub_i32 s4, s4, s5 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_v5i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s10, -2 -; GFX8-NEXT: s_cselect_b32 s12, s0, -1 +; GFX8-NEXT: s_max_i32 s12, s0, -1 ; GFX8-NEXT: s_sub_i32 s12, s12, s10 -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s11, 1 -; GFX8-NEXT: s_cselect_b32 s13, s0, -1 +; GFX8-NEXT: s_min_i32 s13, s0, -1 +; GFX8-NEXT: s_max_i32 s5, s12, s5 ; GFX8-NEXT: s_sub_i32 s13, s13, s11 -; GFX8-NEXT: s_cmp_gt_i32 s12, s5 -; GFX8-NEXT: s_cselect_b32 s5, s12, s5 -; GFX8-NEXT: s_cmp_lt_i32 s5, s13 -; GFX8-NEXT: s_cselect_b32 s5, s5, s13 +; GFX8-NEXT: s_min_i32 s5, s5, s13 ; GFX8-NEXT: s_sub_i32 s0, s0, s5 -; GFX8-NEXT: s_cmp_gt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s5, s1, -1 +; GFX8-NEXT: s_max_i32 s5, s1, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s10 -; GFX8-NEXT: s_cmp_lt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s12, s1, -1 +; GFX8-NEXT: s_min_i32 s12, s1, -1 +; GFX8-NEXT: s_max_i32 s5, s5, s6 ; GFX8-NEXT: s_sub_i32 s12, s12, s11 -; GFX8-NEXT: s_cmp_gt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 -; GFX8-NEXT: s_cmp_lt_i32 s5, s12 -; GFX8-NEXT: s_cselect_b32 s5, s5, s12 +; GFX8-NEXT: s_min_i32 s5, s5, s12 ; GFX8-NEXT: s_sub_i32 s1, s1, s5 -; GFX8-NEXT: s_cmp_gt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s5, s2, -1 +; GFX8-NEXT: s_max_i32 s5, s2, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s10 -; GFX8-NEXT: s_cmp_lt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s6, s2, -1 +; GFX8-NEXT: s_min_i32 s6, s2, -1 ; GFX8-NEXT: s_sub_i32 s6, s6, s11 -; GFX8-NEXT: s_cmp_gt_i32 s5, s7 -; GFX8-NEXT: s_cselect_b32 s5, s5, s7 -; GFX8-NEXT: s_cmp_lt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 +; GFX8-NEXT: s_max_i32 s5, s5, s7 +; GFX8-NEXT: s_min_i32 s5, s5, s6 ; GFX8-NEXT: s_sub_i32 s2, s2, s5 -; GFX8-NEXT: s_cmp_gt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s5, s3, -1 +; GFX8-NEXT: s_max_i32 s5, s3, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s10 -; GFX8-NEXT: s_cmp_lt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s6, s3, -1 +; GFX8-NEXT: s_min_i32 s6, s3, -1 ; GFX8-NEXT: s_sub_i32 s6, s6, s11 -; GFX8-NEXT: s_cmp_gt_i32 s5, s8 -; GFX8-NEXT: s_cselect_b32 s5, s5, s8 -; GFX8-NEXT: s_cmp_lt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 +; GFX8-NEXT: s_max_i32 s5, s5, s8 +; GFX8-NEXT: s_min_i32 s5, s5, s6 ; GFX8-NEXT: s_sub_i32 s3, s3, s5 -; GFX8-NEXT: s_cmp_gt_i32 s4, -1 -; GFX8-NEXT: s_cselect_b32 s5, s4, -1 +; GFX8-NEXT: s_max_i32 s5, s4, -1 ; GFX8-NEXT: s_sub_i32 s5, s5, s10 -; GFX8-NEXT: s_cmp_lt_i32 s4, -1 -; GFX8-NEXT: s_cselect_b32 s6, s4, -1 +; GFX8-NEXT: s_min_i32 s6, s4, -1 ; GFX8-NEXT: s_sub_i32 s6, s6, s11 -; GFX8-NEXT: s_cmp_gt_i32 s5, s9 -; GFX8-NEXT: s_cselect_b32 s5, s5, s9 -; GFX8-NEXT: s_cmp_lt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 +; GFX8-NEXT: s_max_i32 s5, s5, s9 +; GFX8-NEXT: s_min_i32 s5, s5, s6 ; GFX8-NEXT: s_sub_i32 s4, s4, s5 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2377,365 +2185,237 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) { ; GFX6-LABEL: s_ssubsat_v16i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s32, -2 -; GFX6-NEXT: s_cselect_b32 s34, s0, -1 +; GFX6-NEXT: s_max_i32 s34, s0, -1 ; GFX6-NEXT: s_sub_i32 s34, s34, s32 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s33, 1 -; GFX6-NEXT: s_cselect_b32 s35, s0, -1 +; GFX6-NEXT: s_min_i32 s35, s0, -1 +; GFX6-NEXT: s_max_i32 s16, s34, s16 ; GFX6-NEXT: s_sub_i32 s35, s35, s33 -; GFX6-NEXT: s_cmp_gt_i32 s34, s16 -; GFX6-NEXT: s_cselect_b32 s16, s34, s16 -; GFX6-NEXT: s_cmp_lt_i32 s16, s35 -; GFX6-NEXT: s_cselect_b32 s16, s16, s35 +; GFX6-NEXT: s_min_i32 s16, s16, s35 ; GFX6-NEXT: s_sub_i32 s0, s0, s16 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s16, s1, -1 +; GFX6-NEXT: s_max_i32 s16, s1, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s34, s1, -1 +; GFX6-NEXT: s_min_i32 s34, s1, -1 +; GFX6-NEXT: s_max_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s34, s34, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 -; GFX6-NEXT: s_cmp_lt_i32 s16, s34 -; GFX6-NEXT: s_cselect_b32 s16, s16, s34 +; GFX6-NEXT: s_min_i32 s16, s16, s34 ; GFX6-NEXT: s_sub_i32 s1, s1, s16 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s16, s2, -1 +; GFX6-NEXT: s_max_i32 s16, s2, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s17, s2, -1 +; GFX6-NEXT: s_min_i32 s17, s2, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s18 -; GFX6-NEXT: s_cselect_b32 s16, s16, s18 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s18 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s2, s2, s16 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s16, s3, -1 +; GFX6-NEXT: s_max_i32 s16, s3, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s17, s3, -1 +; GFX6-NEXT: s_min_i32 s17, s3, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s19 -; GFX6-NEXT: s_cselect_b32 s16, s16, s19 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s19 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s3, s3, s16 -; GFX6-NEXT: s_cmp_gt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s16, s4, -1 +; GFX6-NEXT: s_max_i32 s16, s4, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s17, s4, -1 +; GFX6-NEXT: s_min_i32 s17, s4, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s20 -; GFX6-NEXT: s_cselect_b32 s16, s16, s20 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s20 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s4, s4, s16 -; GFX6-NEXT: s_cmp_gt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s16, s5, -1 +; GFX6-NEXT: s_max_i32 s16, s5, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s17, s5, -1 +; GFX6-NEXT: s_min_i32 s17, s5, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s21 -; GFX6-NEXT: s_cselect_b32 s16, s16, s21 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s21 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s5, s5, s16 -; GFX6-NEXT: s_cmp_gt_i32 s6, -1 -; GFX6-NEXT: s_cselect_b32 s16, s6, -1 +; GFX6-NEXT: s_max_i32 s16, s6, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s6, -1 -; GFX6-NEXT: s_cselect_b32 s17, s6, -1 +; GFX6-NEXT: s_min_i32 s17, s6, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s22 -; GFX6-NEXT: s_cselect_b32 s16, s16, s22 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s22 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s6, s6, s16 -; GFX6-NEXT: s_cmp_gt_i32 s7, -1 -; GFX6-NEXT: s_cselect_b32 s16, s7, -1 +; GFX6-NEXT: s_max_i32 s16, s7, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s7, -1 -; GFX6-NEXT: s_cselect_b32 s17, s7, -1 +; GFX6-NEXT: s_min_i32 s17, s7, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s23 -; GFX6-NEXT: s_cselect_b32 s16, s16, s23 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s23 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s7, s7, s16 -; GFX6-NEXT: s_cmp_gt_i32 s8, -1 -; GFX6-NEXT: s_cselect_b32 s16, s8, -1 +; GFX6-NEXT: s_max_i32 s16, s8, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s8, -1 -; GFX6-NEXT: s_cselect_b32 s17, s8, -1 +; GFX6-NEXT: s_min_i32 s17, s8, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s24 -; GFX6-NEXT: s_cselect_b32 s16, s16, s24 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s24 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s8, s8, s16 -; GFX6-NEXT: s_cmp_gt_i32 s9, -1 -; GFX6-NEXT: s_cselect_b32 s16, s9, -1 +; GFX6-NEXT: s_max_i32 s16, s9, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s9, -1 -; GFX6-NEXT: s_cselect_b32 s17, s9, -1 +; GFX6-NEXT: s_min_i32 s17, s9, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s25 -; GFX6-NEXT: s_cselect_b32 s16, s16, s25 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s25 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_gt_i32 s10, -1 -; GFX6-NEXT: s_cselect_b32 s16, s10, -1 +; GFX6-NEXT: s_max_i32 s16, s10, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s10, -1 -; GFX6-NEXT: s_cselect_b32 s17, s10, -1 +; GFX6-NEXT: s_min_i32 s17, s10, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s26 -; GFX6-NEXT: s_cselect_b32 s16, s16, s26 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s26 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s10, s10, s16 -; GFX6-NEXT: s_cmp_gt_i32 s11, -1 -; GFX6-NEXT: s_cselect_b32 s16, s11, -1 +; GFX6-NEXT: s_max_i32 s16, s11, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s11, -1 -; GFX6-NEXT: s_cselect_b32 s17, s11, -1 +; GFX6-NEXT: s_min_i32 s17, s11, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s27 -; GFX6-NEXT: s_cselect_b32 s16, s16, s27 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s27 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s11, s11, s16 -; GFX6-NEXT: s_cmp_gt_i32 s12, -1 -; GFX6-NEXT: s_cselect_b32 s16, s12, -1 +; GFX6-NEXT: s_max_i32 s16, s12, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s12, -1 -; GFX6-NEXT: s_cselect_b32 s17, s12, -1 +; GFX6-NEXT: s_min_i32 s17, s12, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s28 -; GFX6-NEXT: s_cselect_b32 s16, s16, s28 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s28 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s12, s12, s16 -; GFX6-NEXT: s_cmp_gt_i32 s13, -1 -; GFX6-NEXT: s_cselect_b32 s16, s13, -1 +; GFX6-NEXT: s_max_i32 s16, s13, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s13, -1 -; GFX6-NEXT: s_cselect_b32 s17, s13, -1 +; GFX6-NEXT: s_min_i32 s17, s13, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s29 -; GFX6-NEXT: s_cselect_b32 s16, s16, s29 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s29 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s13, s13, s16 -; GFX6-NEXT: s_cmp_gt_i32 s14, -1 -; GFX6-NEXT: s_cselect_b32 s16, s14, -1 +; GFX6-NEXT: s_max_i32 s16, s14, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s14, -1 -; GFX6-NEXT: s_cselect_b32 s17, s14, -1 +; GFX6-NEXT: s_min_i32 s17, s14, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s30 -; GFX6-NEXT: s_cselect_b32 s16, s16, s30 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s30 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s14, s14, s16 -; GFX6-NEXT: s_cmp_gt_i32 s15, -1 -; GFX6-NEXT: s_cselect_b32 s16, s15, -1 +; GFX6-NEXT: s_max_i32 s16, s15, -1 ; GFX6-NEXT: s_sub_i32 s16, s16, s32 -; GFX6-NEXT: s_cmp_lt_i32 s15, -1 -; GFX6-NEXT: s_cselect_b32 s17, s15, -1 +; GFX6-NEXT: s_min_i32 s17, s15, -1 ; GFX6-NEXT: s_sub_i32 s17, s17, s33 -; GFX6-NEXT: s_cmp_gt_i32 s16, s31 -; GFX6-NEXT: s_cselect_b32 s16, s16, s31 -; GFX6-NEXT: s_cmp_lt_i32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_max_i32 s16, s16, s31 +; GFX6-NEXT: s_min_i32 s16, s16, s17 ; GFX6-NEXT: s_sub_i32 s15, s15, s16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_ssubsat_v16i32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_cmp_gt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s32, -2 -; GFX8-NEXT: s_cselect_b32 s34, s0, -1 +; GFX8-NEXT: s_max_i32 s34, s0, -1 ; GFX8-NEXT: s_sub_i32 s34, s34, s32 -; GFX8-NEXT: s_cmp_lt_i32 s0, -1 ; GFX8-NEXT: s_brev_b32 s33, 1 -; GFX8-NEXT: s_cselect_b32 s35, s0, -1 +; GFX8-NEXT: s_min_i32 s35, s0, -1 +; GFX8-NEXT: s_max_i32 s16, s34, s16 ; GFX8-NEXT: s_sub_i32 s35, s35, s33 -; GFX8-NEXT: s_cmp_gt_i32 s34, s16 -; GFX8-NEXT: s_cselect_b32 s16, s34, s16 -; GFX8-NEXT: s_cmp_lt_i32 s16, s35 -; GFX8-NEXT: s_cselect_b32 s16, s16, s35 +; GFX8-NEXT: s_min_i32 s16, s16, s35 ; GFX8-NEXT: s_sub_i32 s0, s0, s16 -; GFX8-NEXT: s_cmp_gt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s16, s1, -1 +; GFX8-NEXT: s_max_i32 s16, s1, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s1, -1 -; GFX8-NEXT: s_cselect_b32 s34, s1, -1 +; GFX8-NEXT: s_min_i32 s34, s1, -1 +; GFX8-NEXT: s_max_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s34, s34, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 -; GFX8-NEXT: s_cmp_lt_i32 s16, s34 -; GFX8-NEXT: s_cselect_b32 s16, s16, s34 +; GFX8-NEXT: s_min_i32 s16, s16, s34 ; GFX8-NEXT: s_sub_i32 s1, s1, s16 -; GFX8-NEXT: s_cmp_gt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s16, s2, -1 +; GFX8-NEXT: s_max_i32 s16, s2, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s2, -1 -; GFX8-NEXT: s_cselect_b32 s17, s2, -1 +; GFX8-NEXT: s_min_i32 s17, s2, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s18 -; GFX8-NEXT: s_cselect_b32 s16, s16, s18 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s18 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s2, s2, s16 -; GFX8-NEXT: s_cmp_gt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s16, s3, -1 +; GFX8-NEXT: s_max_i32 s16, s3, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s3, -1 -; GFX8-NEXT: s_cselect_b32 s17, s3, -1 +; GFX8-NEXT: s_min_i32 s17, s3, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s19 -; GFX8-NEXT: s_cselect_b32 s16, s16, s19 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s19 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s3, s3, s16 -; GFX8-NEXT: s_cmp_gt_i32 s4, -1 -; GFX8-NEXT: s_cselect_b32 s16, s4, -1 +; GFX8-NEXT: s_max_i32 s16, s4, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s4, -1 -; GFX8-NEXT: s_cselect_b32 s17, s4, -1 +; GFX8-NEXT: s_min_i32 s17, s4, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s20 -; GFX8-NEXT: s_cselect_b32 s16, s16, s20 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s20 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s4, s4, s16 -; GFX8-NEXT: s_cmp_gt_i32 s5, -1 -; GFX8-NEXT: s_cselect_b32 s16, s5, -1 +; GFX8-NEXT: s_max_i32 s16, s5, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s5, -1 -; GFX8-NEXT: s_cselect_b32 s17, s5, -1 +; GFX8-NEXT: s_min_i32 s17, s5, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s21 -; GFX8-NEXT: s_cselect_b32 s16, s16, s21 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s21 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s5, s5, s16 -; GFX8-NEXT: s_cmp_gt_i32 s6, -1 -; GFX8-NEXT: s_cselect_b32 s16, s6, -1 +; GFX8-NEXT: s_max_i32 s16, s6, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s6, -1 -; GFX8-NEXT: s_cselect_b32 s17, s6, -1 +; GFX8-NEXT: s_min_i32 s17, s6, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s22 -; GFX8-NEXT: s_cselect_b32 s16, s16, s22 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s22 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s6, s6, s16 -; GFX8-NEXT: s_cmp_gt_i32 s7, -1 -; GFX8-NEXT: s_cselect_b32 s16, s7, -1 +; GFX8-NEXT: s_max_i32 s16, s7, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s7, -1 -; GFX8-NEXT: s_cselect_b32 s17, s7, -1 +; GFX8-NEXT: s_min_i32 s17, s7, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s23 -; GFX8-NEXT: s_cselect_b32 s16, s16, s23 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s23 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s7, s7, s16 -; GFX8-NEXT: s_cmp_gt_i32 s8, -1 -; GFX8-NEXT: s_cselect_b32 s16, s8, -1 +; GFX8-NEXT: s_max_i32 s16, s8, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s8, -1 -; GFX8-NEXT: s_cselect_b32 s17, s8, -1 +; GFX8-NEXT: s_min_i32 s17, s8, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s24 -; GFX8-NEXT: s_cselect_b32 s16, s16, s24 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s24 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s8, s8, s16 -; GFX8-NEXT: s_cmp_gt_i32 s9, -1 -; GFX8-NEXT: s_cselect_b32 s16, s9, -1 +; GFX8-NEXT: s_max_i32 s16, s9, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s9, -1 -; GFX8-NEXT: s_cselect_b32 s17, s9, -1 +; GFX8-NEXT: s_min_i32 s17, s9, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s25 -; GFX8-NEXT: s_cselect_b32 s16, s16, s25 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s25 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s9, s9, s16 -; GFX8-NEXT: s_cmp_gt_i32 s10, -1 -; GFX8-NEXT: s_cselect_b32 s16, s10, -1 +; GFX8-NEXT: s_max_i32 s16, s10, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s10, -1 -; GFX8-NEXT: s_cselect_b32 s17, s10, -1 +; GFX8-NEXT: s_min_i32 s17, s10, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s26 -; GFX8-NEXT: s_cselect_b32 s16, s16, s26 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s26 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s10, s10, s16 -; GFX8-NEXT: s_cmp_gt_i32 s11, -1 -; GFX8-NEXT: s_cselect_b32 s16, s11, -1 +; GFX8-NEXT: s_max_i32 s16, s11, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s11, -1 -; GFX8-NEXT: s_cselect_b32 s17, s11, -1 +; GFX8-NEXT: s_min_i32 s17, s11, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s27 -; GFX8-NEXT: s_cselect_b32 s16, s16, s27 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s27 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s11, s11, s16 -; GFX8-NEXT: s_cmp_gt_i32 s12, -1 -; GFX8-NEXT: s_cselect_b32 s16, s12, -1 +; GFX8-NEXT: s_max_i32 s16, s12, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s12, -1 -; GFX8-NEXT: s_cselect_b32 s17, s12, -1 +; GFX8-NEXT: s_min_i32 s17, s12, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s28 -; GFX8-NEXT: s_cselect_b32 s16, s16, s28 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s28 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s12, s12, s16 -; GFX8-NEXT: s_cmp_gt_i32 s13, -1 -; GFX8-NEXT: s_cselect_b32 s16, s13, -1 +; GFX8-NEXT: s_max_i32 s16, s13, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s13, -1 -; GFX8-NEXT: s_cselect_b32 s17, s13, -1 +; GFX8-NEXT: s_min_i32 s17, s13, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s29 -; GFX8-NEXT: s_cselect_b32 s16, s16, s29 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s29 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s13, s13, s16 -; GFX8-NEXT: s_cmp_gt_i32 s14, -1 -; GFX8-NEXT: s_cselect_b32 s16, s14, -1 +; GFX8-NEXT: s_max_i32 s16, s14, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s14, -1 -; GFX8-NEXT: s_cselect_b32 s17, s14, -1 +; GFX8-NEXT: s_min_i32 s17, s14, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s30 -; GFX8-NEXT: s_cselect_b32 s16, s16, s30 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s30 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s14, s14, s16 -; GFX8-NEXT: s_cmp_gt_i32 s15, -1 -; GFX8-NEXT: s_cselect_b32 s16, s15, -1 +; GFX8-NEXT: s_max_i32 s16, s15, -1 ; GFX8-NEXT: s_sub_i32 s16, s16, s32 -; GFX8-NEXT: s_cmp_lt_i32 s15, -1 -; GFX8-NEXT: s_cselect_b32 s17, s15, -1 +; GFX8-NEXT: s_min_i32 s17, s15, -1 ; GFX8-NEXT: s_sub_i32 s17, s17, s33 -; GFX8-NEXT: s_cmp_gt_i32 s16, s31 -; GFX8-NEXT: s_cselect_b32 s16, s16, s31 -; GFX8-NEXT: s_cmp_lt_i32 s16, s17 -; GFX8-NEXT: s_cselect_b32 s16, s16, s17 +; GFX8-NEXT: s_max_i32 s16, s16, s31 +; GFX8-NEXT: s_min_i32 s16, s16, s17 ; GFX8-NEXT: s_sub_i32 s15, s15, s16 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2878,17 +2558,13 @@ ; GFX6-LABEL: s_ssubsat_i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s2, s0, -1 +; GFX6-NEXT: s_min_i32 s3, s0, -1 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s3, s0, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 -; GFX6-NEXT: s_cmp_gt_i32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: s_cmp_lt_i32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s1, s1, s3 +; GFX6-NEXT: s_max_i32 s1, s2, s1 +; GFX6-NEXT: s_min_i32 s1, s1, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: ; return to shader part epilog @@ -2897,20 +2573,16 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_sext_i32_i16 s2, s0 ; GFX8-NEXT: s_sext_i32_i16 s3, -1 -; GFX8-NEXT: s_cmp_gt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s4, s2, s3 +; GFX8-NEXT: s_max_i32 s4, s2, s3 ; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff -; GFX8-NEXT: s_cmp_lt_i32 s2, s3 -; GFX8-NEXT: s_cselect_b32 s2, s2, s3 -; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000 +; GFX8-NEXT: s_min_i32 s2, s2, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s4 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s3, s1 -; GFX8-NEXT: s_cselect_b32 s1, s3, s1 +; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000 +; GFX8-NEXT: s_max_i32 s1, s3, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_lt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s1, s1, s2 +; GFX8-NEXT: s_min_i32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; @@ -2934,12 +2606,10 @@ ; GFX6-LABEL: ssubsat_i16_sv: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s1, s0, -1 -; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 +; GFX6-NEXT: s_max_i32 s1, s0, -1 +; GFX6-NEXT: s_min_i32 s2, s0, -1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff ; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000 ; GFX6-NEXT: v_max_i32_e32 v0, s1, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s2, v0 @@ -2951,11 +2621,9 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_sext_i32_i16 s1, s0 ; GFX8-NEXT: s_sext_i32_i16 s2, -1 -; GFX8-NEXT: s_cmp_gt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s3, s1, s2 +; GFX8-NEXT: s_max_i32 s3, s1, s2 +; GFX8-NEXT: s_min_i32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fff -; GFX8-NEXT: s_cmp_lt_i32 s1, s2 -; GFX8-NEXT: s_cselect_b32 s1, s1, s2 ; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000 ; GFX8-NEXT: v_max_i16_e32 v0, s3, v0 ; GFX8-NEXT: v_min_i16_e32 v0, s1, v0 @@ -3087,36 +2755,28 @@ ; GFX6-LABEL: s_ssubsat_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s4, -2 -; GFX6-NEXT: s_cselect_b32 s6, s0, -1 +; GFX6-NEXT: s_max_i32 s6, s0, -1 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_sub_i32 s6, s6, s4 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: s_cselect_b32 s7, s0, -1 +; GFX6-NEXT: s_min_i32 s7, s0, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s5 -; GFX6-NEXT: s_cmp_gt_i32 s6, s2 -; GFX6-NEXT: s_cselect_b32 s2, s6, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s7 -; GFX6-NEXT: s_cselect_b32 s2, s2, s7 -; GFX6-NEXT: s_sub_i32 s0, s0, s2 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s2, s6, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s7 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s3, s1, -1 +; GFX6-NEXT: s_max_i32 s3, s1, -1 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s4, s1, -1 +; GFX6-NEXT: s_min_i32 s4, s1, -1 ; GFX6-NEXT: s_sub_i32 s4, s4, s5 -; GFX6-NEXT: s_cmp_gt_i32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_cmp_lt_i32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s2, s2, s4 +; GFX6-NEXT: s_max_i32 s2, s3, s2 +; GFX6-NEXT: s_min_i32 s2, s2, s4 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 -; GFX6-NEXT: s_mov_b32 s2, 0xffff ; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_mov_b32 s2, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -3125,42 +2785,34 @@ ; ; GFX8-LABEL: s_ssubsat_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 ; GFX8-NEXT: s_sext_i32_i16 s6, s0 ; GFX8-NEXT: s_sext_i32_i16 s7, -1 -; GFX8-NEXT: s_cmp_gt_i32 s6, s7 +; GFX8-NEXT: s_max_i32 s8, s6, s7 ; GFX8-NEXT: s_movk_i32 s4, 0x7fff -; GFX8-NEXT: s_cselect_b32 s8, s6, s7 ; GFX8-NEXT: s_sub_i32 s8, s8, s4 -; GFX8-NEXT: s_cmp_lt_i32 s6, s7 +; GFX8-NEXT: s_lshr_b32 s3, s1, 16 ; GFX8-NEXT: s_movk_i32 s5, 0x8000 -; GFX8-NEXT: s_cselect_b32 s6, s6, s7 -; GFX8-NEXT: s_sub_i32 s6, s6, s5 +; GFX8-NEXT: s_min_i32 s6, s6, s7 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_gt_i32 s8, s1 -; GFX8-NEXT: s_cselect_b32 s1, s8, s1 +; GFX8-NEXT: s_sub_i32 s6, s6, s5 +; GFX8-NEXT: s_max_i32 s1, s8, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_lt_i32 s1, s6 -; GFX8-NEXT: s_cselect_b32 s1, s1, s6 +; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_min_i32 s1, s1, s6 ; GFX8-NEXT: s_sub_i32 s0, s0, s1 ; GFX8-NEXT: s_sext_i32_i16 s1, s2 -; GFX8-NEXT: s_cmp_gt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s6, s1, s7 +; GFX8-NEXT: s_max_i32 s6, s1, s7 ; GFX8-NEXT: s_sub_i32 s4, s6, s4 -; GFX8-NEXT: s_cmp_lt_i32 s1, s7 -; GFX8-NEXT: s_cselect_b32 s1, s1, s7 -; GFX8-NEXT: s_sub_i32 s1, s1, s5 +; GFX8-NEXT: s_min_i32 s1, s1, s7 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s4, s3 -; GFX8-NEXT: s_cselect_b32 s3, s4, s3 +; GFX8-NEXT: s_sub_i32 s1, s1, s5 +; GFX8-NEXT: s_max_i32 s3, s4, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_cmp_lt_i32 s3, s1 -; GFX8-NEXT: s_cselect_b32 s1, s3, s1 +; GFX8-NEXT: s_min_i32 s1, s3, s1 ; GFX8-NEXT: s_sub_i32 s1, s2, s1 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 @@ -3189,25 +2841,21 @@ ; GFX6-LABEL: ssubsat_v2i16_sv: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s2, -2 -; GFX6-NEXT: s_cselect_b32 s4, s0, -1 -; GFX6-NEXT: s_sub_i32 s4, s4, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 +; GFX6-NEXT: s_max_i32 s4, s0, -1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: s_sub_i32 s4, s4, s2 ; GFX6-NEXT: s_brev_b32 s3, 1 -; GFX6-NEXT: s_cselect_b32 s5, s0, -1 +; GFX6-NEXT: s_min_i32 s5, s0, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s3 ; GFX6-NEXT: v_max_i32_e32 v0, s4, v0 ; GFX6-NEXT: v_min_i32_e32 v0, s5, v0 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: s_lshl_b32 s0, s1, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s1, s0, -1 +; GFX6-NEXT: s_max_i32 s1, s0, -1 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 -; GFX6-NEXT: s_cselect_b32 s2, s0, -1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: s_min_i32 s2, s0, -1 ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: v_max_i32_e32 v1, s1, v1 ; GFX6-NEXT: v_min_i32_e32 v1, s2, v1 @@ -3223,25 +2871,21 @@ ; ; GFX8-LABEL: ssubsat_v2i16_sv: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_sext_i32_i16 s4, s0 ; GFX8-NEXT: s_sext_i32_i16 s5, -1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 +; GFX8-NEXT: s_max_i32 s6, s4, s5 ; GFX8-NEXT: s_movk_i32 s2, 0x7fff -; GFX8-NEXT: s_cselect_b32 s6, s4, s5 ; GFX8-NEXT: s_sub_i32 s6, s6, s2 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 ; GFX8-NEXT: s_movk_i32 s3, 0x8000 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 +; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_sub_i32 s4, s4, s3 ; GFX8-NEXT: v_max_i16_e32 v1, s6, v0 ; GFX8-NEXT: v_min_i16_e32 v1, s4, v1 ; GFX8-NEXT: s_sext_i32_i16 s4, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s6, s4, s5 +; GFX8-NEXT: s_max_i32 s6, s4, s5 ; GFX8-NEXT: s_sub_i32 s2, s6, s2 -; GFX8-NEXT: s_cmp_lt_i32 s4, s5 -; GFX8-NEXT: s_cselect_b32 s4, s4, s5 +; GFX8-NEXT: s_min_i32 s4, s4, s5 ; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: s_sub_i32 s3, s4, s3 @@ -3467,64 +3111,48 @@ ; GFX6-LABEL: s_ssubsat_v4i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s8, -2 -; GFX6-NEXT: s_cselect_b32 s10, s0, -1 +; GFX6-NEXT: s_max_i32 s10, s0, -1 +; GFX6-NEXT: s_lshl_b32 s4, s4, 16 ; GFX6-NEXT: s_sub_i32 s10, s10, s8 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s9, 1 -; GFX6-NEXT: s_cselect_b32 s11, s0, -1 +; GFX6-NEXT: s_min_i32 s11, s0, -1 ; GFX6-NEXT: s_sub_i32 s11, s11, s9 -; GFX6-NEXT: s_cmp_gt_i32 s10, s4 -; GFX6-NEXT: s_cselect_b32 s4, s10, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s4, s11 -; GFX6-NEXT: s_sub_i32 s0, s0, s4 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s4, s10, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s11 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s4, s5, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s5, s1, -1 +; GFX6-NEXT: s_max_i32 s5, s1, -1 +; GFX6-NEXT: s_min_i32 s10, s1, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s10, s1, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s10 -; GFX6-NEXT: s_cselect_b32 s4, s4, s10 -; GFX6-NEXT: s_sub_i32 s1, s1, s4 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_max_i32 s4, s5, s4 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s5, s2, -1 +; GFX6-NEXT: s_min_i32 s4, s4, s10 +; GFX6-NEXT: s_sub_i32 s1, s1, s4 ; GFX6-NEXT: s_lshl_b32 s4, s6, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s5, s2, -1 +; GFX6-NEXT: s_min_i32 s6, s2, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s6, s2, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s4, s4, s6 -; GFX6-NEXT: s_sub_i32 s2, s2, s4 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s4, s5, s4 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s4, s4, s6 +; GFX6-NEXT: s_max_i32 s5, s3, -1 +; GFX6-NEXT: s_sub_i32 s2, s2, s4 +; GFX6-NEXT: s_min_i32 s6, s3, -1 ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s5, s3, -1 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s6, s3, -1 ; GFX6-NEXT: s_sub_i32 s6, s6, s9 -; GFX6-NEXT: s_cmp_gt_i32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_cmp_lt_i32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s4, s4, s6 +; GFX6-NEXT: s_max_i32 s4, s5, s4 +; GFX6-NEXT: s_min_i32 s4, s4, s6 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 ; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 @@ -3537,76 +3165,60 @@ ; ; GFX8-LABEL: s_ssubsat_v4i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s6, s2, 16 -; GFX8-NEXT: s_lshr_b32 s7, s3, 16 -; GFX8-NEXT: s_lshr_b32 s4, s0, 16 -; GFX8-NEXT: s_lshr_b32 s5, s1, 16 ; GFX8-NEXT: s_sext_i32_i16 s10, s0 ; GFX8-NEXT: s_sext_i32_i16 s11, -1 -; GFX8-NEXT: s_cmp_gt_i32 s10, s11 +; GFX8-NEXT: s_max_i32 s12, s10, s11 ; GFX8-NEXT: s_movk_i32 s8, 0x7fff -; GFX8-NEXT: s_cselect_b32 s12, s10, s11 ; GFX8-NEXT: s_sub_i32 s12, s12, s8 -; GFX8-NEXT: s_cmp_lt_i32 s10, s11 +; GFX8-NEXT: s_lshr_b32 s6, s2, 16 ; GFX8-NEXT: s_movk_i32 s9, 0x8000 -; GFX8-NEXT: s_cselect_b32 s10, s10, s11 -; GFX8-NEXT: s_sub_i32 s10, s10, s9 +; GFX8-NEXT: s_min_i32 s10, s10, s11 ; GFX8-NEXT: s_sext_i32_i16 s12, s12 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_gt_i32 s12, s2 -; GFX8-NEXT: s_cselect_b32 s2, s12, s2 +; GFX8-NEXT: s_sub_i32 s10, s10, s9 +; GFX8-NEXT: s_max_i32 s2, s12, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s10, s10 -; GFX8-NEXT: s_cmp_lt_i32 s2, s10 -; GFX8-NEXT: s_cselect_b32 s2, s2, s10 +; GFX8-NEXT: s_lshr_b32 s4, s0, 16 +; GFX8-NEXT: s_min_i32 s2, s2, s10 ; GFX8-NEXT: s_sub_i32 s0, s0, s2 ; GFX8-NEXT: s_sext_i32_i16 s2, s4 -; GFX8-NEXT: s_cmp_gt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s10, s2, s11 +; GFX8-NEXT: s_max_i32 s10, s2, s11 ; GFX8-NEXT: s_sub_i32 s10, s10, s8 -; GFX8-NEXT: s_cmp_lt_i32 s2, s11 -; GFX8-NEXT: s_cselect_b32 s2, s2, s11 -; GFX8-NEXT: s_sub_i32 s2, s2, s9 +; GFX8-NEXT: s_min_i32 s2, s2, s11 ; GFX8-NEXT: s_sext_i32_i16 s10, s10 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_gt_i32 s10, s6 -; GFX8-NEXT: s_cselect_b32 s6, s10, s6 +; GFX8-NEXT: s_sub_i32 s2, s2, s9 +; GFX8-NEXT: s_max_i32 s6, s10, s6 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_cmp_lt_i32 s6, s2 -; GFX8-NEXT: s_cselect_b32 s2, s6, s2 +; GFX8-NEXT: s_min_i32 s2, s6, s2 ; GFX8-NEXT: s_sub_i32 s2, s4, s2 ; GFX8-NEXT: s_sext_i32_i16 s4, s1 -; GFX8-NEXT: s_cmp_gt_i32 s4, s11 -; GFX8-NEXT: s_cselect_b32 s6, s4, s11 +; GFX8-NEXT: s_max_i32 s6, s4, s11 ; GFX8-NEXT: s_sub_i32 s6, s6, s8 -; GFX8-NEXT: s_cmp_lt_i32 s4, s11 -; GFX8-NEXT: s_cselect_b32 s4, s4, s11 -; GFX8-NEXT: s_sub_i32 s4, s4, s9 +; GFX8-NEXT: s_min_i32 s4, s4, s11 +; GFX8-NEXT: s_lshr_b32 s7, s3, 16 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s6, s3 -; GFX8-NEXT: s_cselect_b32 s3, s6, s3 +; GFX8-NEXT: s_max_i32 s3, s6, s3 +; GFX8-NEXT: s_sub_i32 s4, s4, s9 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s3, s4 -; GFX8-NEXT: s_cselect_b32 s3, s3, s4 +; GFX8-NEXT: s_lshr_b32 s5, s1, 16 +; GFX8-NEXT: s_min_i32 s3, s3, s4 ; GFX8-NEXT: s_sub_i32 s1, s1, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s5 -; GFX8-NEXT: s_cmp_gt_i32 s3, s11 -; GFX8-NEXT: s_cselect_b32 s4, s3, s11 +; GFX8-NEXT: s_max_i32 s4, s3, s11 ; GFX8-NEXT: s_sub_i32 s4, s4, s8 -; GFX8-NEXT: s_cmp_lt_i32 s3, s11 -; GFX8-NEXT: s_cselect_b32 s3, s3, s11 -; GFX8-NEXT: s_sub_i32 s3, s3, s9 +; GFX8-NEXT: s_min_i32 s3, s3, s11 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s7 -; GFX8-NEXT: s_cmp_gt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_sub_i32 s3, s3, s9 +; GFX8-NEXT: s_max_i32 s4, s4, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s4, s3 -; GFX8-NEXT: s_cselect_b32 s3, s4, s3 +; GFX8-NEXT: s_min_i32 s3, s4, s3 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 ; GFX8-NEXT: s_sub_i32 s3, s5, s3 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 @@ -3816,92 +3428,67 @@ ; GFX6-LABEL: s_ssubsat_v6i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s12, -2 -; GFX6-NEXT: s_cselect_b32 s14, s0, -1 +; GFX6-NEXT: s_max_i32 s14, s0, -1 +; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_sub_i32 s14, s14, s12 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s13, 1 -; GFX6-NEXT: s_cselect_b32 s15, s0, -1 +; GFX6-NEXT: s_min_i32 s15, s0, -1 ; GFX6-NEXT: s_sub_i32 s15, s15, s13 -; GFX6-NEXT: s_cmp_gt_i32 s14, s6 -; GFX6-NEXT: s_cselect_b32 s6, s14, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s15 -; GFX6-NEXT: s_cselect_b32 s6, s6, s15 -; GFX6-NEXT: s_sub_i32 s0, s0, s6 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s6, s14, s6 +; GFX6-NEXT: s_min_i32 s6, s6, s15 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s6, s7, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s7, s1, -1 +; GFX6-NEXT: s_max_i32 s7, s1, -1 +; GFX6-NEXT: s_min_i32 s14, s1, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s12 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s14, s1, -1 ; GFX6-NEXT: s_sub_i32 s14, s14, s13 -; GFX6-NEXT: s_cmp_gt_i32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s14 -; GFX6-NEXT: s_cselect_b32 s6, s6, s14 -; GFX6-NEXT: s_sub_i32 s1, s1, s6 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_max_i32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s7, s2, -1 +; GFX6-NEXT: s_min_i32 s6, s6, s14 +; GFX6-NEXT: s_sub_i32 s1, s1, s6 ; GFX6-NEXT: s_lshl_b32 s6, s8, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s7, s2, -1 +; GFX6-NEXT: s_min_i32 s8, s2, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s12 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s8, s2, -1 ; GFX6-NEXT: s_sub_i32 s8, s8, s13 -; GFX6-NEXT: s_cmp_gt_i32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s6, s6, s8 -; GFX6-NEXT: s_sub_i32 s2, s2, s6 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s8 +; GFX6-NEXT: s_max_i32 s7, s3, -1 +; GFX6-NEXT: s_sub_i32 s2, s2, s6 +; GFX6-NEXT: s_min_i32 s8, s3, -1 ; GFX6-NEXT: s_lshl_b32 s6, s9, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s7, s3, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s12 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s8, s3, -1 ; GFX6-NEXT: s_sub_i32 s8, s8, s13 -; GFX6-NEXT: s_cmp_gt_i32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s6, s6, s8 -; GFX6-NEXT: s_sub_i32 s3, s3, s6 -; GFX6-NEXT: s_ashr_i32 s3, s3, 16 +; GFX6-NEXT: s_max_i32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s8 +; GFX6-NEXT: s_max_i32 s7, s4, -1 +; GFX6-NEXT: s_sub_i32 s3, s3, s6 +; GFX6-NEXT: s_min_i32 s8, s4, -1 ; GFX6-NEXT: s_lshl_b32 s6, s10, 16 -; GFX6-NEXT: s_cmp_gt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s7, s4, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s12 -; GFX6-NEXT: s_cmp_lt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s8, s4, -1 ; GFX6-NEXT: s_sub_i32 s8, s8, s13 -; GFX6-NEXT: s_cmp_gt_i32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s6, s6, s8 -; GFX6-NEXT: s_sub_i32 s4, s4, s6 -; GFX6-NEXT: s_ashr_i32 s4, s4, 16 +; GFX6-NEXT: s_max_i32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 +; GFX6-NEXT: s_min_i32 s6, s6, s8 +; GFX6-NEXT: s_max_i32 s7, s5, -1 +; GFX6-NEXT: s_sub_i32 s4, s4, s6 +; GFX6-NEXT: s_min_i32 s8, s5, -1 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 -; GFX6-NEXT: s_cmp_gt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s7, s5, -1 ; GFX6-NEXT: s_sub_i32 s7, s7, s12 -; GFX6-NEXT: s_cmp_lt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s8, s5, -1 ; GFX6-NEXT: s_sub_i32 s8, s8, s13 -; GFX6-NEXT: s_cmp_gt_i32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cmp_lt_i32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s6, s6, s8 +; GFX6-NEXT: s_max_i32 s6, s7, s6 +; GFX6-NEXT: s_min_i32 s6, s6, s8 ; GFX6-NEXT: s_sub_i32 s5, s5, s6 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s6, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s6 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 @@ -3910,6 +3497,7 @@ ; GFX6-NEXT: s_ashr_i32 s5, s5, 16 ; GFX6-NEXT: s_and_b32 s3, s5, s6 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s2, s4, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -3918,113 +3506,89 @@ ; ; GFX8-LABEL: s_ssubsat_v6i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s9, s3, 16 -; GFX8-NEXT: s_lshr_b32 s10, s4, 16 -; GFX8-NEXT: s_lshr_b32 s11, s5, 16 -; GFX8-NEXT: s_lshr_b32 s6, s0, 16 -; GFX8-NEXT: s_lshr_b32 s7, s1, 16 -; GFX8-NEXT: s_lshr_b32 s8, s2, 16 ; GFX8-NEXT: s_sext_i32_i16 s14, s0 ; GFX8-NEXT: s_sext_i32_i16 s15, -1 -; GFX8-NEXT: s_cmp_gt_i32 s14, s15 +; GFX8-NEXT: s_max_i32 s16, s14, s15 ; GFX8-NEXT: s_movk_i32 s12, 0x7fff -; GFX8-NEXT: s_cselect_b32 s16, s14, s15 ; GFX8-NEXT: s_sub_i32 s16, s16, s12 -; GFX8-NEXT: s_cmp_lt_i32 s14, s15 +; GFX8-NEXT: s_lshr_b32 s9, s3, 16 ; GFX8-NEXT: s_movk_i32 s13, 0x8000 -; GFX8-NEXT: s_cselect_b32 s14, s14, s15 -; GFX8-NEXT: s_sub_i32 s14, s14, s13 +; GFX8-NEXT: s_min_i32 s14, s14, s15 ; GFX8-NEXT: s_sext_i32_i16 s16, s16 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_gt_i32 s16, s3 -; GFX8-NEXT: s_cselect_b32 s3, s16, s3 +; GFX8-NEXT: s_sub_i32 s14, s14, s13 +; GFX8-NEXT: s_max_i32 s3, s16, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 ; GFX8-NEXT: s_sext_i32_i16 s14, s14 -; GFX8-NEXT: s_cmp_lt_i32 s3, s14 -; GFX8-NEXT: s_cselect_b32 s3, s3, s14 +; GFX8-NEXT: s_lshr_b32 s6, s0, 16 +; GFX8-NEXT: s_min_i32 s3, s3, s14 ; GFX8-NEXT: s_sub_i32 s0, s0, s3 ; GFX8-NEXT: s_sext_i32_i16 s3, s6 -; GFX8-NEXT: s_cmp_gt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s14, s3, s15 +; GFX8-NEXT: s_max_i32 s14, s3, s15 ; GFX8-NEXT: s_sub_i32 s14, s14, s12 -; GFX8-NEXT: s_cmp_lt_i32 s3, s15 -; GFX8-NEXT: s_cselect_b32 s3, s3, s15 -; GFX8-NEXT: s_sub_i32 s3, s3, s13 +; GFX8-NEXT: s_min_i32 s3, s3, s15 ; GFX8-NEXT: s_sext_i32_i16 s14, s14 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 -; GFX8-NEXT: s_cmp_gt_i32 s14, s9 -; GFX8-NEXT: s_cselect_b32 s9, s14, s9 +; GFX8-NEXT: s_sub_i32 s3, s3, s13 +; GFX8-NEXT: s_max_i32 s9, s14, s9 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 ; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_cmp_lt_i32 s9, s3 -; GFX8-NEXT: s_cselect_b32 s3, s9, s3 +; GFX8-NEXT: s_min_i32 s3, s9, s3 ; GFX8-NEXT: s_sub_i32 s3, s6, s3 ; GFX8-NEXT: s_sext_i32_i16 s6, s1 -; GFX8-NEXT: s_cmp_gt_i32 s6, s15 -; GFX8-NEXT: s_cselect_b32 s9, s6, s15 +; GFX8-NEXT: s_max_i32 s9, s6, s15 ; GFX8-NEXT: s_sub_i32 s9, s9, s12 -; GFX8-NEXT: s_cmp_lt_i32 s6, s15 -; GFX8-NEXT: s_cselect_b32 s6, s6, s15 -; GFX8-NEXT: s_sub_i32 s6, s6, s13 +; GFX8-NEXT: s_min_i32 s6, s6, s15 +; GFX8-NEXT: s_lshr_b32 s10, s4, 16 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s9, s4 -; GFX8-NEXT: s_cselect_b32 s4, s9, s4 +; GFX8-NEXT: s_max_i32 s4, s9, s4 +; GFX8-NEXT: s_sub_i32 s6, s6, s13 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_lt_i32 s4, s6 -; GFX8-NEXT: s_cselect_b32 s4, s4, s6 +; GFX8-NEXT: s_lshr_b32 s7, s1, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s6 ; GFX8-NEXT: s_sub_i32 s1, s1, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s7 -; GFX8-NEXT: s_cmp_gt_i32 s4, s15 -; GFX8-NEXT: s_cselect_b32 s6, s4, s15 +; GFX8-NEXT: s_max_i32 s6, s4, s15 ; GFX8-NEXT: s_sub_i32 s6, s6, s12 -; GFX8-NEXT: s_cmp_lt_i32 s4, s15 -; GFX8-NEXT: s_cselect_b32 s4, s4, s15 -; GFX8-NEXT: s_sub_i32 s4, s4, s13 +; GFX8-NEXT: s_min_i32 s4, s4, s15 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s9, s10 -; GFX8-NEXT: s_cmp_gt_i32 s6, s9 -; GFX8-NEXT: s_cselect_b32 s6, s6, s9 +; GFX8-NEXT: s_sub_i32 s4, s4, s13 +; GFX8-NEXT: s_max_i32 s6, s6, s9 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s6, s4 -; GFX8-NEXT: s_cselect_b32 s4, s6, s4 -; GFX8-NEXT: s_sub_i32 s4, s7, s4 +; GFX8-NEXT: s_min_i32 s4, s6, s4 ; GFX8-NEXT: s_sext_i32_i16 s6, s2 -; GFX8-NEXT: s_cmp_gt_i32 s6, s15 -; GFX8-NEXT: s_cselect_b32 s7, s6, s15 +; GFX8-NEXT: s_sub_i32 s4, s7, s4 +; GFX8-NEXT: s_max_i32 s7, s6, s15 ; GFX8-NEXT: s_sub_i32 s7, s7, s12 -; GFX8-NEXT: s_cmp_lt_i32 s6, s15 -; GFX8-NEXT: s_cselect_b32 s6, s6, s15 -; GFX8-NEXT: s_sub_i32 s6, s6, s13 +; GFX8-NEXT: s_min_i32 s6, s6, s15 +; GFX8-NEXT: s_lshr_b32 s11, s5, 16 ; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_gt_i32 s7, s5 -; GFX8-NEXT: s_cselect_b32 s5, s7, s5 +; GFX8-NEXT: s_max_i32 s5, s7, s5 +; GFX8-NEXT: s_sub_i32 s6, s6, s13 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_lt_i32 s5, s6 -; GFX8-NEXT: s_cselect_b32 s5, s5, s6 +; GFX8-NEXT: s_lshr_b32 s8, s2, 16 +; GFX8-NEXT: s_min_i32 s5, s5, s6 ; GFX8-NEXT: s_sub_i32 s2, s2, s5 ; GFX8-NEXT: s_sext_i32_i16 s5, s8 -; GFX8-NEXT: s_cmp_gt_i32 s5, s15 -; GFX8-NEXT: s_cselect_b32 s6, s5, s15 +; GFX8-NEXT: s_max_i32 s6, s5, s15 ; GFX8-NEXT: s_sub_i32 s6, s6, s12 -; GFX8-NEXT: s_cmp_lt_i32 s5, s15 -; GFX8-NEXT: s_cselect_b32 s5, s5, s15 -; GFX8-NEXT: s_sub_i32 s5, s5, s13 +; GFX8-NEXT: s_min_i32 s5, s5, s15 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s7, s11 -; GFX8-NEXT: s_cmp_gt_i32 s6, s7 -; GFX8-NEXT: s_cselect_b32 s6, s6, s7 +; GFX8-NEXT: s_sub_i32 s5, s5, s13 +; GFX8-NEXT: s_max_i32 s6, s6, s7 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s6, s5 -; GFX8-NEXT: s_cselect_b32 s5, s6, s5 -; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_min_i32 s5, s6, s5 ; GFX8-NEXT: s_or_b32 s0, s0, s3 ; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 ; GFX8-NEXT: s_sub_i32 s5, s8, s5 @@ -4271,132 +3835,100 @@ ; GFX6-LABEL: s_ssubsat_v8i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_cmp_gt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s16, -2 -; GFX6-NEXT: s_cselect_b32 s18, s0, -1 +; GFX6-NEXT: s_max_i32 s18, s0, -1 +; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_sub_i32 s18, s18, s16 -; GFX6-NEXT: s_cmp_lt_i32 s0, -1 ; GFX6-NEXT: s_brev_b32 s17, 1 -; GFX6-NEXT: s_cselect_b32 s19, s0, -1 +; GFX6-NEXT: s_min_i32 s19, s0, -1 ; GFX6-NEXT: s_sub_i32 s19, s19, s17 -; GFX6-NEXT: s_cmp_gt_i32 s18, s8 -; GFX6-NEXT: s_cselect_b32 s8, s18, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s19 -; GFX6-NEXT: s_cselect_b32 s8, s8, s19 -; GFX6-NEXT: s_sub_i32 s0, s0, s8 -; GFX6-NEXT: s_ashr_i32 s0, s0, 16 +; GFX6-NEXT: s_max_i32 s8, s18, s8 +; GFX6-NEXT: s_min_i32 s8, s8, s19 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_sub_i32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s8, s9, 16 -; GFX6-NEXT: s_cmp_gt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s9, s1, -1 +; GFX6-NEXT: s_max_i32 s9, s1, -1 +; GFX6-NEXT: s_min_i32 s18, s1, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s1, -1 -; GFX6-NEXT: s_cselect_b32 s18, s1, -1 ; GFX6-NEXT: s_sub_i32 s18, s18, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s18 -; GFX6-NEXT: s_cselect_b32 s8, s8, s18 -; GFX6-NEXT: s_sub_i32 s1, s1, s8 -; GFX6-NEXT: s_ashr_i32 s1, s1, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s9, s2, -1 +; GFX6-NEXT: s_min_i32 s8, s8, s18 +; GFX6-NEXT: s_sub_i32 s1, s1, s8 ; GFX6-NEXT: s_lshl_b32 s8, s10, 16 -; GFX6-NEXT: s_cmp_gt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s9, s2, -1 +; GFX6-NEXT: s_min_i32 s10, s2, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s2, -1 -; GFX6-NEXT: s_cselect_b32 s10, s2, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 -; GFX6-NEXT: s_sub_i32 s2, s2, s8 -; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s9, s3, -1 +; GFX6-NEXT: s_sub_i32 s2, s2, s8 +; GFX6-NEXT: s_min_i32 s10, s3, -1 ; GFX6-NEXT: s_lshl_b32 s8, s11, 16 -; GFX6-NEXT: s_cmp_gt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s9, s3, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s3, -1 -; GFX6-NEXT: s_cselect_b32 s10, s3, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 -; GFX6-NEXT: s_sub_i32 s3, s3, s8 -; GFX6-NEXT: s_ashr_i32 s3, s3, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s9, s4, -1 +; GFX6-NEXT: s_sub_i32 s3, s3, s8 +; GFX6-NEXT: s_min_i32 s10, s4, -1 ; GFX6-NEXT: s_lshl_b32 s8, s12, 16 -; GFX6-NEXT: s_cmp_gt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s9, s4, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s4, -1 -; GFX6-NEXT: s_cselect_b32 s10, s4, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 -; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_ashr_i32 s4, s4, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s9, s5, -1 +; GFX6-NEXT: s_sub_i32 s4, s4, s8 +; GFX6-NEXT: s_min_i32 s10, s5, -1 ; GFX6-NEXT: s_lshl_b32 s8, s13, 16 -; GFX6-NEXT: s_cmp_gt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s9, s5, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s5, -1 -; GFX6-NEXT: s_cselect_b32 s10, s5, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 -; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_ashr_i32 s5, s5, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s9, s6, -1 +; GFX6-NEXT: s_sub_i32 s5, s5, s8 +; GFX6-NEXT: s_min_i32 s10, s6, -1 ; GFX6-NEXT: s_lshl_b32 s8, s14, 16 -; GFX6-NEXT: s_cmp_gt_i32 s6, -1 -; GFX6-NEXT: s_cselect_b32 s9, s6, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s6, -1 -; GFX6-NEXT: s_cselect_b32 s10, s6, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 -; GFX6-NEXT: s_sub_i32 s6, s6, s8 -; GFX6-NEXT: s_ashr_i32 s6, s6, 16 +; GFX6-NEXT: s_max_i32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16 +; GFX6-NEXT: s_min_i32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s9, s7, -1 +; GFX6-NEXT: s_sub_i32 s6, s6, s8 +; GFX6-NEXT: s_min_i32 s10, s7, -1 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 -; GFX6-NEXT: s_cmp_gt_i32 s7, -1 -; GFX6-NEXT: s_cselect_b32 s9, s7, -1 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_i32 s7, -1 -; GFX6-NEXT: s_cselect_b32 s10, s7, -1 ; GFX6-NEXT: s_sub_i32 s10, s10, s17 -; GFX6-NEXT: s_cmp_gt_i32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_cmp_lt_i32 s8, s10 -; GFX6-NEXT: s_cselect_b32 s8, s8, s10 +; GFX6-NEXT: s_max_i32 s8, s9, s8 +; GFX6-NEXT: s_min_i32 s8, s8, s10 ; GFX6-NEXT: s_sub_i32 s7, s7, s8 +; GFX6-NEXT: s_ashr_i32 s1, s1, 16 ; GFX6-NEXT: s_mov_b32 s8, 0xffff +; GFX6-NEXT: s_ashr_i32 s0, s0, 16 ; GFX6-NEXT: s_and_b32 s1, s1, s8 +; GFX6-NEXT: s_ashr_i32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s3, s3, 16 ; GFX6-NEXT: s_and_b32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_and_b32 s1, s2, s8 ; GFX6-NEXT: s_and_b32 s2, s3, s8 +; GFX6-NEXT: s_ashr_i32 s5, s5, 16 ; GFX6-NEXT: s_and_b32 s3, s5, s8 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: s_ashr_i32 s7, s7, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s2, s4, s8 ; GFX6-NEXT: s_and_b32 s4, s7, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_ashr_i32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s6, s8 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 @@ -4405,151 +3937,119 @@ ; ; GFX8-LABEL: s_ssubsat_v8i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s12, s4, 16 -; GFX8-NEXT: s_lshr_b32 s13, s5, 16 -; GFX8-NEXT: s_lshr_b32 s14, s6, 16 -; GFX8-NEXT: s_lshr_b32 s15, s7, 16 -; GFX8-NEXT: s_lshr_b32 s8, s0, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 16 -; GFX8-NEXT: s_lshr_b32 s10, s2, 16 -; GFX8-NEXT: s_lshr_b32 s11, s3, 16 ; GFX8-NEXT: s_sext_i32_i16 s18, s0 ; GFX8-NEXT: s_sext_i32_i16 s19, -1 -; GFX8-NEXT: s_cmp_gt_i32 s18, s19 +; GFX8-NEXT: s_max_i32 s20, s18, s19 ; GFX8-NEXT: s_movk_i32 s16, 0x7fff -; GFX8-NEXT: s_cselect_b32 s20, s18, s19 ; GFX8-NEXT: s_sub_i32 s20, s20, s16 -; GFX8-NEXT: s_cmp_lt_i32 s18, s19 +; GFX8-NEXT: s_lshr_b32 s12, s4, 16 ; GFX8-NEXT: s_movk_i32 s17, 0x8000 -; GFX8-NEXT: s_cselect_b32 s18, s18, s19 -; GFX8-NEXT: s_sub_i32 s18, s18, s17 +; GFX8-NEXT: s_min_i32 s18, s18, s19 ; GFX8-NEXT: s_sext_i32_i16 s20, s20 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_gt_i32 s20, s4 -; GFX8-NEXT: s_cselect_b32 s4, s20, s4 +; GFX8-NEXT: s_sub_i32 s18, s18, s17 +; GFX8-NEXT: s_max_i32 s4, s20, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_sext_i32_i16 s18, s18 -; GFX8-NEXT: s_cmp_lt_i32 s4, s18 -; GFX8-NEXT: s_cselect_b32 s4, s4, s18 +; GFX8-NEXT: s_lshr_b32 s8, s0, 16 +; GFX8-NEXT: s_min_i32 s4, s4, s18 ; GFX8-NEXT: s_sub_i32 s0, s0, s4 ; GFX8-NEXT: s_sext_i32_i16 s4, s8 -; GFX8-NEXT: s_cmp_gt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s18, s4, s19 +; GFX8-NEXT: s_max_i32 s18, s4, s19 ; GFX8-NEXT: s_sub_i32 s18, s18, s16 -; GFX8-NEXT: s_cmp_lt_i32 s4, s19 -; GFX8-NEXT: s_cselect_b32 s4, s4, s19 -; GFX8-NEXT: s_sub_i32 s4, s4, s17 +; GFX8-NEXT: s_min_i32 s4, s4, s19 ; GFX8-NEXT: s_sext_i32_i16 s18, s18 ; GFX8-NEXT: s_sext_i32_i16 s12, s12 -; GFX8-NEXT: s_cmp_gt_i32 s18, s12 -; GFX8-NEXT: s_cselect_b32 s12, s18, s12 +; GFX8-NEXT: s_sub_i32 s4, s4, s17 +; GFX8-NEXT: s_max_i32 s12, s18, s12 ; GFX8-NEXT: s_sext_i32_i16 s12, s12 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_cmp_lt_i32 s12, s4 -; GFX8-NEXT: s_cselect_b32 s4, s12, s4 +; GFX8-NEXT: s_min_i32 s4, s12, s4 ; GFX8-NEXT: s_sub_i32 s4, s8, s4 ; GFX8-NEXT: s_sext_i32_i16 s8, s1 -; GFX8-NEXT: s_cmp_gt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s12, s8, s19 +; GFX8-NEXT: s_max_i32 s12, s8, s19 ; GFX8-NEXT: s_sub_i32 s12, s12, s16 -; GFX8-NEXT: s_cmp_lt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s8, s8, s19 -; GFX8-NEXT: s_sub_i32 s8, s8, s17 +; GFX8-NEXT: s_min_i32 s8, s8, s19 +; GFX8-NEXT: s_lshr_b32 s13, s5, 16 ; GFX8-NEXT: s_sext_i32_i16 s12, s12 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_gt_i32 s12, s5 -; GFX8-NEXT: s_cselect_b32 s5, s12, s5 +; GFX8-NEXT: s_max_i32 s5, s12, s5 +; GFX8-NEXT: s_sub_i32 s8, s8, s17 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 -; GFX8-NEXT: s_cmp_lt_i32 s5, s8 -; GFX8-NEXT: s_cselect_b32 s5, s5, s8 +; GFX8-NEXT: s_lshr_b32 s9, s1, 16 +; GFX8-NEXT: s_min_i32 s5, s5, s8 ; GFX8-NEXT: s_sub_i32 s1, s1, s5 ; GFX8-NEXT: s_sext_i32_i16 s5, s9 -; GFX8-NEXT: s_cmp_gt_i32 s5, s19 -; GFX8-NEXT: s_cselect_b32 s8, s5, s19 +; GFX8-NEXT: s_max_i32 s8, s5, s19 ; GFX8-NEXT: s_sub_i32 s8, s8, s16 -; GFX8-NEXT: s_cmp_lt_i32 s5, s19 -; GFX8-NEXT: s_cselect_b32 s5, s5, s19 -; GFX8-NEXT: s_sub_i32 s5, s5, s17 +; GFX8-NEXT: s_min_i32 s5, s5, s19 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s12, s13 -; GFX8-NEXT: s_cmp_gt_i32 s8, s12 -; GFX8-NEXT: s_cselect_b32 s8, s8, s12 +; GFX8-NEXT: s_sub_i32 s5, s5, s17 +; GFX8-NEXT: s_max_i32 s8, s8, s12 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_cmp_lt_i32 s8, s5 -; GFX8-NEXT: s_cselect_b32 s5, s8, s5 -; GFX8-NEXT: s_sub_i32 s5, s9, s5 +; GFX8-NEXT: s_min_i32 s5, s8, s5 ; GFX8-NEXT: s_sext_i32_i16 s8, s2 -; GFX8-NEXT: s_cmp_gt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s9, s8, s19 +; GFX8-NEXT: s_sub_i32 s5, s9, s5 +; GFX8-NEXT: s_max_i32 s9, s8, s19 ; GFX8-NEXT: s_sub_i32 s9, s9, s16 -; GFX8-NEXT: s_cmp_lt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s8, s8, s19 -; GFX8-NEXT: s_sub_i32 s8, s8, s17 +; GFX8-NEXT: s_min_i32 s8, s8, s19 +; GFX8-NEXT: s_lshr_b32 s14, s6, 16 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_gt_i32 s9, s6 -; GFX8-NEXT: s_cselect_b32 s6, s9, s6 +; GFX8-NEXT: s_max_i32 s6, s9, s6 +; GFX8-NEXT: s_sub_i32 s8, s8, s17 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 -; GFX8-NEXT: s_cmp_lt_i32 s6, s8 -; GFX8-NEXT: s_cselect_b32 s6, s6, s8 +; GFX8-NEXT: s_lshr_b32 s10, s2, 16 +; GFX8-NEXT: s_min_i32 s6, s6, s8 ; GFX8-NEXT: s_sub_i32 s2, s2, s6 ; GFX8-NEXT: s_sext_i32_i16 s6, s10 -; GFX8-NEXT: s_cmp_gt_i32 s6, s19 -; GFX8-NEXT: s_cselect_b32 s8, s6, s19 +; GFX8-NEXT: s_max_i32 s8, s6, s19 ; GFX8-NEXT: s_sub_i32 s8, s8, s16 -; GFX8-NEXT: s_cmp_lt_i32 s6, s19 -; GFX8-NEXT: s_cselect_b32 s6, s6, s19 -; GFX8-NEXT: s_sub_i32 s6, s6, s17 +; GFX8-NEXT: s_min_i32 s6, s6, s19 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s9, s14 -; GFX8-NEXT: s_cmp_gt_i32 s8, s9 -; GFX8-NEXT: s_cselect_b32 s8, s8, s9 +; GFX8-NEXT: s_sub_i32 s6, s6, s17 +; GFX8-NEXT: s_max_i32 s8, s8, s9 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_cmp_lt_i32 s8, s6 -; GFX8-NEXT: s_cselect_b32 s6, s8, s6 -; GFX8-NEXT: s_sub_i32 s6, s10, s6 +; GFX8-NEXT: s_min_i32 s6, s8, s6 ; GFX8-NEXT: s_sext_i32_i16 s8, s3 -; GFX8-NEXT: s_cmp_gt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s9, s8, s19 +; GFX8-NEXT: s_max_i32 s9, s8, s19 ; GFX8-NEXT: s_sub_i32 s9, s9, s16 -; GFX8-NEXT: s_cmp_lt_i32 s8, s19 -; GFX8-NEXT: s_cselect_b32 s8, s8, s19 -; GFX8-NEXT: s_sub_i32 s8, s8, s17 +; GFX8-NEXT: s_min_i32 s8, s8, s19 +; GFX8-NEXT: s_lshr_b32 s15, s7, 16 ; GFX8-NEXT: s_sext_i32_i16 s9, s9 ; GFX8-NEXT: s_sext_i32_i16 s7, s7 -; GFX8-NEXT: s_cmp_gt_i32 s9, s7 -; GFX8-NEXT: s_cselect_b32 s7, s9, s7 +; GFX8-NEXT: s_max_i32 s7, s9, s7 +; GFX8-NEXT: s_sub_i32 s8, s8, s17 ; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 -; GFX8-NEXT: s_cmp_lt_i32 s7, s8 -; GFX8-NEXT: s_cselect_b32 s7, s7, s8 +; GFX8-NEXT: s_lshr_b32 s11, s3, 16 +; GFX8-NEXT: s_min_i32 s7, s7, s8 ; GFX8-NEXT: s_sub_i32 s3, s3, s7 ; GFX8-NEXT: s_sext_i32_i16 s7, s11 -; GFX8-NEXT: s_cmp_gt_i32 s7, s19 -; GFX8-NEXT: s_cselect_b32 s8, s7, s19 +; GFX8-NEXT: s_max_i32 s8, s7, s19 ; GFX8-NEXT: s_sub_i32 s8, s8, s16 -; GFX8-NEXT: s_cmp_lt_i32 s7, s19 -; GFX8-NEXT: s_cselect_b32 s7, s7, s19 -; GFX8-NEXT: s_sub_i32 s7, s7, s17 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_min_i32 s7, s7, s19 ; GFX8-NEXT: s_sext_i32_i16 s8, s8 ; GFX8-NEXT: s_sext_i32_i16 s9, s15 -; GFX8-NEXT: s_cmp_gt_i32 s8, s9 -; GFX8-NEXT: s_cselect_b32 s8, s8, s9 -; GFX8-NEXT: s_sext_i32_i16 s8, s8 -; GFX8-NEXT: s_sext_i32_i16 s7, s7 -; GFX8-NEXT: s_cmp_lt_i32 s8, s7 -; GFX8-NEXT: s_cselect_b32 s7, s8, s7 -; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_sub_i32 s7, s7, s17 +; GFX8-NEXT: s_max_i32 s8, s8, s9 ; GFX8-NEXT: s_or_b32 s0, s0, s4 ; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 +; GFX8-NEXT: s_sub_i32 s6, s10, s6 +; GFX8-NEXT: s_sext_i32_i16 s8, s8 +; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_min_i32 s7, s8, s7 ; GFX8-NEXT: s_or_b32 s1, s1, s4 ; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000 ; GFX8-NEXT: s_sub_i32 s7, s11, s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -53,8 +53,7 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 25 ; GFX6-NEXT: s_lshl_b32 s1, s1, 25 ; GFX6-NEXT: s_not_b32 s2, s0 -; GFX6-NEXT: s_cmp_lt_u32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 +; GFX6-NEXT: s_min_u32 s1, s2, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 25 ; GFX6-NEXT: ; return to shader part epilog @@ -143,8 +142,7 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 ; GFX6-NEXT: s_not_b32 s2, s0 -; GFX6-NEXT: s_cmp_lt_u32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 +; GFX6-NEXT: s_min_u32 s1, s2, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: ; return to shader part epilog @@ -272,17 +270,15 @@ ; GFX6-NEXT: s_lshr_b32 s3, s1, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 ; GFX6-NEXT: s_not_b32 s4, s0 -; GFX6-NEXT: s_cmp_lt_u32 s4, s1 -; GFX6-NEXT: s_cselect_b32 s1, s4, s1 +; GFX6-NEXT: s_min_u32 s1, s4, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_not_b32 s3, s1 -; GFX6-NEXT: s_cmp_lt_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 +; GFX6-NEXT: s_min_u32 s2, s3, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -521,31 +517,27 @@ ; GFX6-NEXT: s_lshr_b32 s7, s1, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 ; GFX6-NEXT: s_not_b32 s8, s0 -; GFX6-NEXT: s_cmp_lt_u32 s8, s1 -; GFX6-NEXT: s_cselect_b32 s1, s8, s1 +; GFX6-NEXT: s_min_u32 s1, s8, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 ; GFX6-NEXT: s_lshl_b32 s2, s5, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_not_b32 s5, s1 -; GFX6-NEXT: s_cmp_lt_u32 s5, s2 -; GFX6-NEXT: s_cselect_b32 s2, s5, s2 +; GFX6-NEXT: s_min_u32 s2, s5, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_lshl_b32 s3, s6, 24 ; GFX6-NEXT: s_not_b32 s5, s2 -; GFX6-NEXT: s_cmp_lt_u32 s5, s3 -; GFX6-NEXT: s_cselect_b32 s3, s5, s3 +; GFX6-NEXT: s_min_u32 s3, s5, s3 ; GFX6-NEXT: s_add_i32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 -; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 ; GFX6-NEXT: s_not_b32 s5, s3 -; GFX6-NEXT: s_cmp_lt_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 +; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_min_u32 s4, s5, s4 ; GFX6-NEXT: s_add_i32 s3, s3, s4 +; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 24 @@ -736,8 +728,7 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_not_b32 s2, s0 -; GFX6-NEXT: s_cmp_lt_u32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 +; GFX6-NEXT: s_min_u32 s1, s2, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 8 ; GFX6-NEXT: ; return to shader part epilog @@ -809,8 +800,7 @@ ; GFX6-LABEL: s_uaddsat_i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s2, s0 -; GFX6-NEXT: s_cmp_lt_u32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 +; GFX6-NEXT: s_min_u32 s1, s2, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog ; @@ -932,12 +922,10 @@ ; GFX6-LABEL: s_uaddsat_v2i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s4, s0 -; GFX6-NEXT: s_cmp_lt_u32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s2, s4, s2 +; GFX6-NEXT: s_min_u32 s2, s4, s2 ; GFX6-NEXT: s_add_i32 s0, s0, s2 ; GFX6-NEXT: s_not_b32 s2, s1 -; GFX6-NEXT: s_cmp_lt_u32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s2, s2, s3 +; GFX6-NEXT: s_min_u32 s2, s2, s3 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1019,16 +1007,13 @@ ; GFX6-LABEL: s_uaddsat_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s6, s0 -; GFX6-NEXT: s_cmp_lt_u32 s6, s3 -; GFX6-NEXT: s_cselect_b32 s3, s6, s3 +; GFX6-NEXT: s_min_u32 s3, s6, s3 ; GFX6-NEXT: s_add_i32 s0, s0, s3 ; GFX6-NEXT: s_not_b32 s3, s1 -; GFX6-NEXT: s_cmp_lt_u32 s3, s4 -; GFX6-NEXT: s_cselect_b32 s3, s3, s4 +; GFX6-NEXT: s_min_u32 s3, s3, s4 ; GFX6-NEXT: s_add_i32 s1, s1, s3 ; GFX6-NEXT: s_not_b32 s3, s2 -; GFX6-NEXT: s_cmp_lt_u32 s3, s5 -; GFX6-NEXT: s_cselect_b32 s3, s3, s5 +; GFX6-NEXT: s_min_u32 s3, s3, s5 ; GFX6-NEXT: s_add_i32 s2, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1124,20 +1109,16 @@ ; GFX6-LABEL: s_uaddsat_v4i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s8, s0 -; GFX6-NEXT: s_cmp_lt_u32 s8, s4 -; GFX6-NEXT: s_cselect_b32 s4, s8, s4 +; GFX6-NEXT: s_min_u32 s4, s8, s4 ; GFX6-NEXT: s_add_i32 s0, s0, s4 ; GFX6-NEXT: s_not_b32 s4, s1 -; GFX6-NEXT: s_cmp_lt_u32 s4, s5 -; GFX6-NEXT: s_cselect_b32 s4, s4, s5 +; GFX6-NEXT: s_min_u32 s4, s4, s5 ; GFX6-NEXT: s_add_i32 s1, s1, s4 ; GFX6-NEXT: s_not_b32 s4, s2 -; GFX6-NEXT: s_cmp_lt_u32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s4, s4, s6 +; GFX6-NEXT: s_min_u32 s4, s4, s6 ; GFX6-NEXT: s_add_i32 s2, s2, s4 ; GFX6-NEXT: s_not_b32 s4, s3 -; GFX6-NEXT: s_cmp_lt_u32 s4, s7 -; GFX6-NEXT: s_cselect_b32 s4, s4, s7 +; GFX6-NEXT: s_min_u32 s4, s4, s7 ; GFX6-NEXT: s_add_i32 s3, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1247,24 +1228,19 @@ ; GFX6-LABEL: s_uaddsat_v5i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s10, s0 -; GFX6-NEXT: s_cmp_lt_u32 s10, s5 -; GFX6-NEXT: s_cselect_b32 s5, s10, s5 +; GFX6-NEXT: s_min_u32 s5, s10, s5 ; GFX6-NEXT: s_add_i32 s0, s0, s5 ; GFX6-NEXT: s_not_b32 s5, s1 -; GFX6-NEXT: s_cmp_lt_u32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s5, s6 +; GFX6-NEXT: s_min_u32 s5, s5, s6 ; GFX6-NEXT: s_add_i32 s1, s1, s5 ; GFX6-NEXT: s_not_b32 s5, s2 -; GFX6-NEXT: s_cmp_lt_u32 s5, s7 -; GFX6-NEXT: s_cselect_b32 s5, s5, s7 +; GFX6-NEXT: s_min_u32 s5, s5, s7 ; GFX6-NEXT: s_add_i32 s2, s2, s5 ; GFX6-NEXT: s_not_b32 s5, s3 -; GFX6-NEXT: s_cmp_lt_u32 s5, s8 -; GFX6-NEXT: s_cselect_b32 s5, s5, s8 +; GFX6-NEXT: s_min_u32 s5, s5, s8 ; GFX6-NEXT: s_add_i32 s3, s3, s5 ; GFX6-NEXT: s_not_b32 s5, s4 -; GFX6-NEXT: s_cmp_lt_u32 s5, s9 -; GFX6-NEXT: s_cselect_b32 s5, s5, s9 +; GFX6-NEXT: s_min_u32 s5, s5, s9 ; GFX6-NEXT: s_add_i32 s4, s4, s5 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1448,68 +1424,52 @@ ; GFX6-LABEL: s_uaddsat_v16i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_not_b32 s32, s0 -; GFX6-NEXT: s_cmp_lt_u32 s32, s16 -; GFX6-NEXT: s_cselect_b32 s16, s32, s16 +; GFX6-NEXT: s_min_u32 s16, s32, s16 ; GFX6-NEXT: s_add_i32 s0, s0, s16 ; GFX6-NEXT: s_not_b32 s16, s1 -; GFX6-NEXT: s_cmp_lt_u32 s16, s17 -; GFX6-NEXT: s_cselect_b32 s16, s16, s17 +; GFX6-NEXT: s_min_u32 s16, s16, s17 ; GFX6-NEXT: s_add_i32 s1, s1, s16 ; GFX6-NEXT: s_not_b32 s16, s2 -; GFX6-NEXT: s_cmp_lt_u32 s16, s18 -; GFX6-NEXT: s_cselect_b32 s16, s16, s18 +; GFX6-NEXT: s_min_u32 s16, s16, s18 ; GFX6-NEXT: s_add_i32 s2, s2, s16 ; GFX6-NEXT: s_not_b32 s16, s3 -; GFX6-NEXT: s_cmp_lt_u32 s16, s19 -; GFX6-NEXT: s_cselect_b32 s16, s16, s19 +; GFX6-NEXT: s_min_u32 s16, s16, s19 ; GFX6-NEXT: s_add_i32 s3, s3, s16 ; GFX6-NEXT: s_not_b32 s16, s4 -; GFX6-NEXT: s_cmp_lt_u32 s16, s20 -; GFX6-NEXT: s_cselect_b32 s16, s16, s20 +; GFX6-NEXT: s_min_u32 s16, s16, s20 ; GFX6-NEXT: s_add_i32 s4, s4, s16 ; GFX6-NEXT: s_not_b32 s16, s5 -; GFX6-NEXT: s_cmp_lt_u32 s16, s21 -; GFX6-NEXT: s_cselect_b32 s16, s16, s21 +; GFX6-NEXT: s_min_u32 s16, s16, s21 ; GFX6-NEXT: s_add_i32 s5, s5, s16 ; GFX6-NEXT: s_not_b32 s16, s6 -; GFX6-NEXT: s_cmp_lt_u32 s16, s22 -; GFX6-NEXT: s_cselect_b32 s16, s16, s22 +; GFX6-NEXT: s_min_u32 s16, s16, s22 ; GFX6-NEXT: s_add_i32 s6, s6, s16 ; GFX6-NEXT: s_not_b32 s16, s7 -; GFX6-NEXT: s_cmp_lt_u32 s16, s23 -; GFX6-NEXT: s_cselect_b32 s16, s16, s23 +; GFX6-NEXT: s_min_u32 s16, s16, s23 ; GFX6-NEXT: s_add_i32 s7, s7, s16 ; GFX6-NEXT: s_not_b32 s16, s8 -; GFX6-NEXT: s_cmp_lt_u32 s16, s24 -; GFX6-NEXT: s_cselect_b32 s16, s16, s24 +; GFX6-NEXT: s_min_u32 s16, s16, s24 ; GFX6-NEXT: s_add_i32 s8, s8, s16 ; GFX6-NEXT: s_not_b32 s16, s9 -; GFX6-NEXT: s_cmp_lt_u32 s16, s25 -; GFX6-NEXT: s_cselect_b32 s16, s16, s25 +; GFX6-NEXT: s_min_u32 s16, s16, s25 ; GFX6-NEXT: s_add_i32 s9, s9, s16 ; GFX6-NEXT: s_not_b32 s16, s10 -; GFX6-NEXT: s_cmp_lt_u32 s16, s26 -; GFX6-NEXT: s_cselect_b32 s16, s16, s26 +; GFX6-NEXT: s_min_u32 s16, s16, s26 ; GFX6-NEXT: s_add_i32 s10, s10, s16 ; GFX6-NEXT: s_not_b32 s16, s11 -; GFX6-NEXT: s_cmp_lt_u32 s16, s27 -; GFX6-NEXT: s_cselect_b32 s16, s16, s27 +; GFX6-NEXT: s_min_u32 s16, s16, s27 ; GFX6-NEXT: s_add_i32 s11, s11, s16 ; GFX6-NEXT: s_not_b32 s16, s12 -; GFX6-NEXT: s_cmp_lt_u32 s16, s28 -; GFX6-NEXT: s_cselect_b32 s16, s16, s28 +; GFX6-NEXT: s_min_u32 s16, s16, s28 ; GFX6-NEXT: s_add_i32 s12, s12, s16 ; GFX6-NEXT: s_not_b32 s16, s13 -; GFX6-NEXT: s_cmp_lt_u32 s16, s29 -; GFX6-NEXT: s_cselect_b32 s16, s16, s29 +; GFX6-NEXT: s_min_u32 s16, s16, s29 ; GFX6-NEXT: s_add_i32 s13, s13, s16 ; GFX6-NEXT: s_not_b32 s16, s14 -; GFX6-NEXT: s_cmp_lt_u32 s16, s30 -; GFX6-NEXT: s_cselect_b32 s16, s16, s30 +; GFX6-NEXT: s_min_u32 s16, s16, s30 ; GFX6-NEXT: s_add_i32 s14, s14, s16 ; GFX6-NEXT: s_not_b32 s16, s15 -; GFX6-NEXT: s_cmp_lt_u32 s16, s31 -; GFX6-NEXT: s_cselect_b32 s16, s16, s31 +; GFX6-NEXT: s_min_u32 s16, s16, s31 ; GFX6-NEXT: s_add_i32 s15, s15, s16 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1696,8 +1656,7 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_not_b32 s2, s0 -; GFX6-NEXT: s_cmp_lt_u32 s2, s1 -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 +; GFX6-NEXT: s_min_u32 s1, s2, s1 ; GFX6-NEXT: s_add_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: ; return to shader part epilog @@ -1835,17 +1794,15 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_not_b32 s4, s0 -; GFX6-NEXT: s_cmp_lt_u32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s2, s4, s2 -; GFX6-NEXT: s_add_i32 s0, s0, s2 +; GFX6-NEXT: s_min_u32 s2, s4, s2 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s0, s0, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_not_b32 s3, s1 -; GFX6-NEXT: s_cmp_lt_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 +; GFX6-NEXT: s_min_u32 s2, s3, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -2053,33 +2010,29 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 ; GFX6-NEXT: s_not_b32 s8, s0 -; GFX6-NEXT: s_cmp_lt_u32 s8, s4 -; GFX6-NEXT: s_cselect_b32 s4, s8, s4 -; GFX6-NEXT: s_add_i32 s0, s0, s4 +; GFX6-NEXT: s_min_u32 s4, s8, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s4, s5, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_not_b32 s5, s1 -; GFX6-NEXT: s_cmp_lt_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_add_i32 s1, s1, s4 +; GFX6-NEXT: s_min_u32 s4, s5, s4 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s4 ; GFX6-NEXT: s_lshl_b32 s4, s6, 16 ; GFX6-NEXT: s_not_b32 s5, s2 -; GFX6-NEXT: s_cmp_lt_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_add_i32 s2, s2, s4 +; GFX6-NEXT: s_min_u32 s4, s5, s4 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s2, s2, s4 ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 ; GFX6-NEXT: s_not_b32 s5, s3 -; GFX6-NEXT: s_cmp_lt_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 +; GFX6-NEXT: s_min_u32 s4, s5, s4 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_add_i32 s3, s3, s4 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -2234,49 +2187,43 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_not_b32 s12, s0 -; GFX6-NEXT: s_cmp_lt_u32 s12, s6 -; GFX6-NEXT: s_cselect_b32 s6, s12, s6 -; GFX6-NEXT: s_add_i32 s0, s0, s6 +; GFX6-NEXT: s_min_u32 s6, s12, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s6, s7, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_not_b32 s7, s1 -; GFX6-NEXT: s_cmp_lt_u32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_add_i32 s1, s1, s6 +; GFX6-NEXT: s_min_u32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s6 ; GFX6-NEXT: s_lshl_b32 s6, s8, 16 ; GFX6-NEXT: s_not_b32 s7, s2 -; GFX6-NEXT: s_cmp_lt_u32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_add_i32 s2, s2, s6 +; GFX6-NEXT: s_min_u32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s2, s2, s6 ; GFX6-NEXT: s_lshl_b32 s6, s9, 16 ; GFX6-NEXT: s_not_b32 s7, s3 -; GFX6-NEXT: s_cmp_lt_u32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_add_i32 s3, s3, s6 +; GFX6-NEXT: s_min_u32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 +; GFX6-NEXT: s_add_i32 s3, s3, s6 ; GFX6-NEXT: s_lshl_b32 s6, s10, 16 ; GFX6-NEXT: s_not_b32 s7, s4 -; GFX6-NEXT: s_cmp_lt_u32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_add_i32 s4, s4, s6 +; GFX6-NEXT: s_min_u32 s6, s7, s6 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 +; GFX6-NEXT: s_add_i32 s4, s4, s6 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 ; GFX6-NEXT: s_not_b32 s7, s5 -; GFX6-NEXT: s_cmp_lt_u32 s7, s6 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_min_u32 s6, s7, s6 ; GFX6-NEXT: s_add_i32 s5, s5, s6 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s2, s5, 16 ; GFX6-NEXT: s_or_b32 s2, s4, s2 ; GFX6-NEXT: ; return to shader part epilog @@ -2454,65 +2401,57 @@ ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_not_b32 s16, s0 -; GFX6-NEXT: s_cmp_lt_u32 s16, s8 -; GFX6-NEXT: s_cselect_b32 s8, s16, s8 -; GFX6-NEXT: s_add_i32 s0, s0, s8 +; GFX6-NEXT: s_min_u32 s8, s16, s8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s8, s9, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_not_b32 s9, s1 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s1, s1, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_add_i32 s1, s1, s8 ; GFX6-NEXT: s_lshl_b32 s8, s10, 16 ; GFX6-NEXT: s_not_b32 s9, s2 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s2, s2, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_add_i32 s2, s2, s8 ; GFX6-NEXT: s_lshl_b32 s8, s11, 16 ; GFX6-NEXT: s_not_b32 s9, s3 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s3, s3, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 +; GFX6-NEXT: s_add_i32 s3, s3, s8 ; GFX6-NEXT: s_lshl_b32 s8, s12, 16 ; GFX6-NEXT: s_not_b32 s9, s4 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s4, s4, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 +; GFX6-NEXT: s_add_i32 s4, s4, s8 ; GFX6-NEXT: s_lshl_b32 s8, s13, 16 ; GFX6-NEXT: s_not_b32 s9, s5 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s5, s5, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_lshr_b32 s5, s5, 16 +; GFX6-NEXT: s_add_i32 s5, s5, s8 ; GFX6-NEXT: s_lshl_b32 s8, s14, 16 ; GFX6-NEXT: s_not_b32 s9, s6 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 -; GFX6-NEXT: s_add_i32 s6, s6, s8 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 +; GFX6-NEXT: s_add_i32 s6, s6, s8 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 ; GFX6-NEXT: s_not_b32 s9, s7 -; GFX6-NEXT: s_cmp_lt_u32 s9, s8 -; GFX6-NEXT: s_cselect_b32 s8, s9, s8 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_min_u32 s8, s9, s8 ; GFX6-NEXT: s_add_i32 s7, s7, s8 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_lshr_b32 s7, s7, 16 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 +; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_lshr_b32 s6, s6, 16 +; GFX6-NEXT: s_lshl_b32 s3, s7, 16 ; GFX6-NEXT: s_or_b32 s2, s4, s2 ; GFX6-NEXT: s_or_b32 s3, s6, s3 ; GFX6-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -51,8 +51,7 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 25 ; GFX6-NEXT: s_lshl_b32 s1, s1, 25 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 25 ; GFX6-NEXT: ; return to shader part epilog @@ -139,8 +138,7 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: ; return to shader part epilog @@ -265,16 +263,14 @@ ; GFX6-NEXT: s_lshr_b32 s3, s1, 8 ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_cmp_lt_u32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s2, s1, s2 +; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -508,28 +504,24 @@ ; GFX6-NEXT: s_lshr_b32 s7, s1, 24 ; GFX6-NEXT: s_lshl_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s2, s5, 24 -; GFX6-NEXT: s_cmp_lt_u32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s2, s1, s2 +; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshl_b32 s2, s3, 24 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_lshl_b32 s3, s6, 24 -; GFX6-NEXT: s_cmp_lt_u32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s3, s2, s3 +; GFX6-NEXT: s_min_u32 s3, s2, s3 ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 -; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 -; GFX6-NEXT: s_cmp_lt_u32 s3, s4 -; GFX6-NEXT: s_cselect_b32 s4, s3, s4 +; GFX6-NEXT: s_lshr_b32 s1, s1, 24 +; GFX6-NEXT: s_min_u32 s4, s3, s4 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 +; GFX6-NEXT: s_lshr_b32 s0, s0, 24 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: s_lshl_b32 s1, s2, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 24 @@ -718,8 +710,7 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 8 ; GFX6-NEXT: ; return to shader part epilog @@ -789,8 +780,7 @@ define amdgpu_ps i32 @s_usubsat_i32(i32 inreg %lhs, i32 inreg %rhs) { ; GFX6-LABEL: s_usubsat_i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog ; @@ -907,11 +897,9 @@ define amdgpu_ps <2 x i32> @s_usubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v2i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s2 -; GFX6-NEXT: s_cselect_b32 s2, s0, s2 +; GFX6-NEXT: s_min_u32 s2, s0, s2 ; GFX6-NEXT: s_sub_i32 s0, s0, s2 -; GFX6-NEXT: s_cmp_lt_u32 s1, s3 -; GFX6-NEXT: s_cselect_b32 s2, s1, s3 +; GFX6-NEXT: s_min_u32 s2, s1, s3 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: ; return to shader part epilog ; @@ -989,14 +977,11 @@ define amdgpu_ps <3 x i32> @s_usubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v3i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s3 -; GFX6-NEXT: s_cselect_b32 s3, s0, s3 +; GFX6-NEXT: s_min_u32 s3, s0, s3 ; GFX6-NEXT: s_sub_i32 s0, s0, s3 -; GFX6-NEXT: s_cmp_lt_u32 s1, s4 -; GFX6-NEXT: s_cselect_b32 s3, s1, s4 +; GFX6-NEXT: s_min_u32 s3, s1, s4 ; GFX6-NEXT: s_sub_i32 s1, s1, s3 -; GFX6-NEXT: s_cmp_lt_u32 s2, s5 -; GFX6-NEXT: s_cselect_b32 s3, s2, s5 +; GFX6-NEXT: s_min_u32 s3, s2, s5 ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1087,17 +1072,13 @@ define amdgpu_ps <4 x i32> @s_usubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s4 -; GFX6-NEXT: s_cselect_b32 s4, s0, s4 +; GFX6-NEXT: s_min_u32 s4, s0, s4 ; GFX6-NEXT: s_sub_i32 s0, s0, s4 -; GFX6-NEXT: s_cmp_lt_u32 s1, s5 -; GFX6-NEXT: s_cselect_b32 s4, s1, s5 +; GFX6-NEXT: s_min_u32 s4, s1, s5 ; GFX6-NEXT: s_sub_i32 s1, s1, s4 -; GFX6-NEXT: s_cmp_lt_u32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s4, s2, s6 +; GFX6-NEXT: s_min_u32 s4, s2, s6 ; GFX6-NEXT: s_sub_i32 s2, s2, s4 -; GFX6-NEXT: s_cmp_lt_u32 s3, s7 -; GFX6-NEXT: s_cselect_b32 s4, s3, s7 +; GFX6-NEXT: s_min_u32 s4, s3, s7 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1201,20 +1182,15 @@ define amdgpu_ps <5 x i32> @s_usubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v5i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s5 -; GFX6-NEXT: s_cselect_b32 s5, s0, s5 +; GFX6-NEXT: s_min_u32 s5, s0, s5 ; GFX6-NEXT: s_sub_i32 s0, s0, s5 -; GFX6-NEXT: s_cmp_lt_u32 s1, s6 -; GFX6-NEXT: s_cselect_b32 s5, s1, s6 +; GFX6-NEXT: s_min_u32 s5, s1, s6 ; GFX6-NEXT: s_sub_i32 s1, s1, s5 -; GFX6-NEXT: s_cmp_lt_u32 s2, s7 -; GFX6-NEXT: s_cselect_b32 s5, s2, s7 +; GFX6-NEXT: s_min_u32 s5, s2, s7 ; GFX6-NEXT: s_sub_i32 s2, s2, s5 -; GFX6-NEXT: s_cmp_lt_u32 s3, s8 -; GFX6-NEXT: s_cselect_b32 s5, s3, s8 +; GFX6-NEXT: s_min_u32 s5, s3, s8 ; GFX6-NEXT: s_sub_i32 s3, s3, s5 -; GFX6-NEXT: s_cmp_lt_u32 s4, s9 -; GFX6-NEXT: s_cselect_b32 s5, s4, s9 +; GFX6-NEXT: s_min_u32 s5, s4, s9 ; GFX6-NEXT: s_sub_i32 s4, s4, s5 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1381,53 +1357,37 @@ define amdgpu_ps <16 x i32> @s_usubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) { ; GFX6-LABEL: s_usubsat_v16i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_cmp_lt_u32 s0, s16 -; GFX6-NEXT: s_cselect_b32 s16, s0, s16 +; GFX6-NEXT: s_min_u32 s16, s0, s16 ; GFX6-NEXT: s_sub_i32 s0, s0, s16 -; GFX6-NEXT: s_cmp_lt_u32 s1, s17 -; GFX6-NEXT: s_cselect_b32 s16, s1, s17 +; GFX6-NEXT: s_min_u32 s16, s1, s17 ; GFX6-NEXT: s_sub_i32 s1, s1, s16 -; GFX6-NEXT: s_cmp_lt_u32 s2, s18 -; GFX6-NEXT: s_cselect_b32 s16, s2, s18 +; GFX6-NEXT: s_min_u32 s16, s2, s18 ; GFX6-NEXT: s_sub_i32 s2, s2, s16 -; GFX6-NEXT: s_cmp_lt_u32 s3, s19 -; GFX6-NEXT: s_cselect_b32 s16, s3, s19 +; GFX6-NEXT: s_min_u32 s16, s3, s19 ; GFX6-NEXT: s_sub_i32 s3, s3, s16 -; GFX6-NEXT: s_cmp_lt_u32 s4, s20 -; GFX6-NEXT: s_cselect_b32 s16, s4, s20 +; GFX6-NEXT: s_min_u32 s16, s4, s20 ; GFX6-NEXT: s_sub_i32 s4, s4, s16 -; GFX6-NEXT: s_cmp_lt_u32 s5, s21 -; GFX6-NEXT: s_cselect_b32 s16, s5, s21 +; GFX6-NEXT: s_min_u32 s16, s5, s21 ; GFX6-NEXT: s_sub_i32 s5, s5, s16 -; GFX6-NEXT: s_cmp_lt_u32 s6, s22 -; GFX6-NEXT: s_cselect_b32 s16, s6, s22 +; GFX6-NEXT: s_min_u32 s16, s6, s22 ; GFX6-NEXT: s_sub_i32 s6, s6, s16 -; GFX6-NEXT: s_cmp_lt_u32 s7, s23 -; GFX6-NEXT: s_cselect_b32 s16, s7, s23 +; GFX6-NEXT: s_min_u32 s16, s7, s23 ; GFX6-NEXT: s_sub_i32 s7, s7, s16 -; GFX6-NEXT: s_cmp_lt_u32 s8, s24 -; GFX6-NEXT: s_cselect_b32 s16, s8, s24 +; GFX6-NEXT: s_min_u32 s16, s8, s24 ; GFX6-NEXT: s_sub_i32 s8, s8, s16 -; GFX6-NEXT: s_cmp_lt_u32 s9, s25 -; GFX6-NEXT: s_cselect_b32 s16, s9, s25 +; GFX6-NEXT: s_min_u32 s16, s9, s25 ; GFX6-NEXT: s_sub_i32 s9, s9, s16 -; GFX6-NEXT: s_cmp_lt_u32 s10, s26 -; GFX6-NEXT: s_cselect_b32 s16, s10, s26 +; GFX6-NEXT: s_min_u32 s16, s10, s26 ; GFX6-NEXT: s_sub_i32 s10, s10, s16 -; GFX6-NEXT: s_cmp_lt_u32 s11, s27 -; GFX6-NEXT: s_cselect_b32 s16, s11, s27 +; GFX6-NEXT: s_min_u32 s16, s11, s27 ; GFX6-NEXT: s_sub_i32 s11, s11, s16 -; GFX6-NEXT: s_cmp_lt_u32 s12, s28 -; GFX6-NEXT: s_cselect_b32 s16, s12, s28 +; GFX6-NEXT: s_min_u32 s16, s12, s28 ; GFX6-NEXT: s_sub_i32 s12, s12, s16 -; GFX6-NEXT: s_cmp_lt_u32 s13, s29 -; GFX6-NEXT: s_cselect_b32 s16, s13, s29 +; GFX6-NEXT: s_min_u32 s16, s13, s29 ; GFX6-NEXT: s_sub_i32 s13, s13, s16 -; GFX6-NEXT: s_cmp_lt_u32 s14, s30 -; GFX6-NEXT: s_cselect_b32 s16, s14, s30 +; GFX6-NEXT: s_min_u32 s16, s14, s30 ; GFX6-NEXT: s_sub_i32 s14, s14, s16 -; GFX6-NEXT: s_cmp_lt_u32 s15, s31 -; GFX6-NEXT: s_cselect_b32 s16, s15, s31 +; GFX6-NEXT: s_min_u32 s16, s15, s31 ; GFX6-NEXT: s_sub_i32 s15, s15, s16 ; GFX6-NEXT: ; return to shader part epilog ; @@ -1612,8 +1572,7 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_cmp_lt_u32 s0, s1 -; GFX6-NEXT: s_cselect_b32 s1, s0, s1 +; GFX6-NEXT: s_min_u32 s1, s0, s1 ; GFX6-NEXT: s_sub_i32 s0, s0, s1 ; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: ; return to shader part epilog @@ -1746,16 +1705,14 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_cmp_lt_u32 s0, s2 -; GFX6-NEXT: s_cselect_b32 s2, s0, s2 +; GFX6-NEXT: s_min_u32 s2, s0, s2 ; GFX6-NEXT: s_sub_i32 s0, s0, s2 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s2, s3, 16 -; GFX6-NEXT: s_cmp_lt_u32 s1, s2 -; GFX6-NEXT: s_cselect_b32 s2, s1, s2 +; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -1954,30 +1911,26 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_cmp_lt_u32 s0, s4 -; GFX6-NEXT: s_cselect_b32 s4, s0, s4 +; GFX6-NEXT: s_min_u32 s4, s0, s4 ; GFX6-NEXT: s_sub_i32 s0, s0, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s4, s5, 16 -; GFX6-NEXT: s_cmp_lt_u32 s1, s4 -; GFX6-NEXT: s_cselect_b32 s4, s1, s4 +; GFX6-NEXT: s_min_u32 s4, s1, s4 ; GFX6-NEXT: s_sub_i32 s1, s1, s4 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s4, s6, 16 -; GFX6-NEXT: s_cmp_lt_u32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s4, s2, s4 +; GFX6-NEXT: s_min_u32 s4, s2, s4 ; GFX6-NEXT: s_sub_i32 s2, s2, s4 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 -; GFX6-NEXT: s_cmp_lt_u32 s3, s4 -; GFX6-NEXT: s_cselect_b32 s4, s3, s4 +; GFX6-NEXT: s_min_u32 s4, s3, s4 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -2125,44 +2078,38 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_cmp_lt_u32 s0, s6 -; GFX6-NEXT: s_cselect_b32 s6, s0, s6 +; GFX6-NEXT: s_min_u32 s6, s0, s6 ; GFX6-NEXT: s_sub_i32 s0, s0, s6 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s6, s7, 16 -; GFX6-NEXT: s_cmp_lt_u32 s1, s6 -; GFX6-NEXT: s_cselect_b32 s6, s1, s6 +; GFX6-NEXT: s_min_u32 s6, s1, s6 ; GFX6-NEXT: s_sub_i32 s1, s1, s6 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s6, s8, 16 -; GFX6-NEXT: s_cmp_lt_u32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s6, s2, s6 +; GFX6-NEXT: s_min_u32 s6, s2, s6 ; GFX6-NEXT: s_sub_i32 s2, s2, s6 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s6, s9, 16 -; GFX6-NEXT: s_cmp_lt_u32 s3, s6 -; GFX6-NEXT: s_cselect_b32 s6, s3, s6 +; GFX6-NEXT: s_min_u32 s6, s3, s6 ; GFX6-NEXT: s_sub_i32 s3, s3, s6 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s6, s10, 16 -; GFX6-NEXT: s_cmp_lt_u32 s4, s6 -; GFX6-NEXT: s_cselect_b32 s6, s4, s6 +; GFX6-NEXT: s_min_u32 s6, s4, s6 ; GFX6-NEXT: s_sub_i32 s4, s4, s6 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 -; GFX6-NEXT: s_cmp_lt_u32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s6, s5, s6 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_min_u32 s6, s5, s6 ; GFX6-NEXT: s_sub_i32 s5, s5, s6 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s2, s5, 16 ; GFX6-NEXT: s_or_b32 s2, s4, s2 ; GFX6-NEXT: ; return to shader part epilog @@ -2331,58 +2278,50 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_lshl_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_cmp_lt_u32 s0, s8 -; GFX6-NEXT: s_cselect_b32 s8, s0, s8 +; GFX6-NEXT: s_min_u32 s8, s0, s8 ; GFX6-NEXT: s_sub_i32 s0, s0, s8 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s8, s9, 16 -; GFX6-NEXT: s_cmp_lt_u32 s1, s8 -; GFX6-NEXT: s_cselect_b32 s8, s1, s8 +; GFX6-NEXT: s_min_u32 s8, s1, s8 ; GFX6-NEXT: s_sub_i32 s1, s1, s8 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s8, s10, 16 -; GFX6-NEXT: s_cmp_lt_u32 s2, s8 -; GFX6-NEXT: s_cselect_b32 s8, s2, s8 +; GFX6-NEXT: s_min_u32 s8, s2, s8 ; GFX6-NEXT: s_sub_i32 s2, s2, s8 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s8, s11, 16 -; GFX6-NEXT: s_cmp_lt_u32 s3, s8 -; GFX6-NEXT: s_cselect_b32 s8, s3, s8 +; GFX6-NEXT: s_min_u32 s8, s3, s8 ; GFX6-NEXT: s_sub_i32 s3, s3, s8 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s8, s12, 16 -; GFX6-NEXT: s_cmp_lt_u32 s4, s8 -; GFX6-NEXT: s_cselect_b32 s8, s4, s8 +; GFX6-NEXT: s_min_u32 s8, s4, s8 ; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 ; GFX6-NEXT: s_lshl_b32 s8, s13, 16 -; GFX6-NEXT: s_cmp_lt_u32 s5, s8 -; GFX6-NEXT: s_cselect_b32 s8, s5, s8 +; GFX6-NEXT: s_min_u32 s8, s5, s8 ; GFX6-NEXT: s_sub_i32 s5, s5, s8 -; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_lshl_b32 s8, s14, 16 -; GFX6-NEXT: s_cmp_lt_u32 s6, s8 -; GFX6-NEXT: s_cselect_b32 s8, s6, s8 +; GFX6-NEXT: s_min_u32 s8, s6, s8 ; GFX6-NEXT: s_sub_i32 s6, s6, s8 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 -; GFX6-NEXT: s_cmp_lt_u32 s7, s8 -; GFX6-NEXT: s_cselect_b32 s8, s7, s8 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_min_u32 s8, s7, s8 ; GFX6-NEXT: s_sub_i32 s7, s7, s8 +; GFX6-NEXT: s_lshr_b32 s0, s0, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s2, s2, 16 ; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_lshr_b32 s7, s7, 16 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 +; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshr_b32 s4, s4, 16 ; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_lshr_b32 s6, s6, 16 +; GFX6-NEXT: s_lshl_b32 s3, s7, 16 ; GFX6-NEXT: s_or_b32 s2, s4, s2 ; GFX6-NEXT: s_or_b32 s3, s6, s3 ; GFX6-NEXT: ; return to shader part epilog