diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -285,6 +285,8 @@ LegalizeResult lowerBswap(MachineInstr &MI); LegalizeResult lowerBitreverse(MachineInstr &MI); LegalizeResult lowerReadWriteRegister(MachineInstr &MI); + LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI); + LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI); private: MachineRegisterInfo &MRI; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4956,6 +4956,151 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + bool IsSigned; + bool IsAdd; + unsigned BaseOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + BaseOp = TargetOpcode::G_ADD; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + BaseOp = TargetOpcode::G_SUB; + break; + } + + if (IsSigned) { + // sadd.sat(a, b) -> + // hi = 0x7fffffff - smax(a, 0) + // lo = 0x80000000 - smin(a, 0) + // a + smin(smax(lo, b), hi) + // ssub.sat(a, b) -> + // lo = smax(a, -1) - 0x7fffffff + // hi = smin(a, -1) - 0x80000000 + // a - smin(smax(lo, b), hi) + // TODO: AMDGPU can use a "median of 3" instruction here: + // a +/- med3(lo, b, hi) + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto MaxVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + MachineInstrBuilder Hi, Lo; + if (IsAdd) { + auto Zero = MIRBuilder.buildConstant(Ty, 0); + Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); + Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); + } else { + auto NegOne = MIRBuilder.buildConstant(Ty, -1); + Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), + MaxVal); + Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), + MinVal); + } + auto RHSClamped = + MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); + } else { + // uadd.sat(a, b) -> a + umin(~a, b) + // usub.sat(a, b) -> a - umin(a, b) + Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; + auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); + MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + bool IsSigned; + bool IsAdd; + unsigned OverflowOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected addsat/subsat opcode"); + case TargetOpcode::G_UADDSAT: + IsSigned = false; + IsAdd = true; + OverflowOp = TargetOpcode::G_UADDO; + break; + case TargetOpcode::G_SADDSAT: + IsSigned = true; + IsAdd = true; + OverflowOp = TargetOpcode::G_SADDO; + break; + case TargetOpcode::G_USUBSAT: + IsSigned = false; + IsAdd = false; + OverflowOp = TargetOpcode::G_USUBO; + break; + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + IsAdd = false; + OverflowOp = TargetOpcode::G_SSUBO; + break; + } + + auto OverflowRes = + MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); + Register Tmp = OverflowRes.getReg(0); + Register Ov = OverflowRes.getReg(1); + MachineInstrBuilder Clamp; + if (IsSigned) { + // sadd.sat(a, b) -> + // {tmp, ov} = saddo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + // ssub.sat(a, b) -> + // {tmp, ov} = ssubo(a, b) + // ov ? (tmp >>s 31) + 0x80000000 : r + uint64_t NumBits = Ty.getScalarSizeInBits(); + auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); + auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); + auto MinVal = + MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); + Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); + } else { + // uadd.sat(a, b) -> + // {tmp, ov} = uaddo(a, b) + // ov ? 0xffffffff : tmp + // usub.sat(a, b) -> + // {tmp, ov} = usubo(a, b) + // ov ? 0 : tmp + Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); + } + MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -117,6 +117,9 @@ bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeAddSubSat(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1330,6 +1330,10 @@ G_FMINIMUM, G_FMAXIMUM }).lower(); + getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT, G_UADDSAT, G_USUBSAT}) + .legalFor({S16, V2S16, S32}) + .custom(); + getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE, G_DYN_STACKALLOC, G_INDEXED_LOAD, G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD, G_INDEXED_STORE}) @@ -1402,6 +1406,11 @@ return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, B); + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + return legalizeAddSubSat(MI, MRI, B); default: return false; } @@ -2587,6 +2596,38 @@ return false; } +bool AMDGPULegalizerInfo::legalizeAddSubSat(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + bool IsSigned; + switch (MI.getOpcode()) { + default: + llvm_unreachable("unhandled opcode"); + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: + IsSigned = true; + break; + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + IsSigned = false; + break; + } + + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder); + HelperBuilder.setInstr(MI); + if (IsSigned) { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + if (!Ty.isVector()) + return Helper.lowerAddSubSatToAddoSubo(MI) == LegalizerHelper::Legalized; + // Lowering to min/max produces more instructions but they are all + // trivially vectorizable. + } + return Helper.lowerAddSubSatToMinMax(MI) == LegalizerHelper::Legalized; +} + bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sat.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sat.mir @@ -0,0 +1,288 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GCN +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GCN + +--- +name: uaddsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: uaddsat_s16 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GCN: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GCN: $vgpr0 = COPY [[ANYEXT]](s32) + %3:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %4(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %5:_(s16) = G_UADDSAT %0, %1 + %7:_(s32) = G_ANYEXT %5(s16) + $vgpr0 = COPY %7(s32) +... + +--- +name: uaddsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: uaddsat_s32 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[UADDSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_UADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: uaddsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: uaddsat_s64 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GCN: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[C]] + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[XOR]](s64), [[COPY1]] + ; GCN: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[XOR]], [[COPY1]] + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GCN: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) + ; GCN: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GCN: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GCN: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_UADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: saddsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: saddsat_s16 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GCN: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GCN: $vgpr0 = COPY [[ANYEXT]](s32) + %3:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %4(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %5:_(s16) = G_SADDSAT %0, %1 + %7:_(s32) = G_ANYEXT %5(s16) + $vgpr0 = COPY %7(s32) +... + +--- +name: saddsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: saddsat_s32 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[SADDSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_SADDSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: saddsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: saddsat_s64 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GCN: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GCN: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GCN: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GCN: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GCN: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GCN: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GCN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GCN: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GCN: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GCN: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GCN: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GCN: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GCN: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SADDSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: usubsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: usubsat_s16 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GCN: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GCN: $vgpr0 = COPY [[ANYEXT]](s32) + %3:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %4(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %5:_(s16) = G_USUBSAT %0, %1 + %7:_(s32) = G_ANYEXT %5(s16) + $vgpr0 = COPY %7(s32) +... + +--- +name: usubsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: usubsat_s32 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[USUBSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_USUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: usubsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: usubsat_s64 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GCN: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GCN: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) + ; GCN: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GCN: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GCN: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_USUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ssubsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: ssubsat_s16 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GCN: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GCN: $vgpr0 = COPY [[ANYEXT]](s32) + %3:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %4(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %5:_(s16) = G_SSUBSAT %0, %1 + %7:_(s32) = G_ANYEXT %5(s16) + $vgpr0 = COPY %7(s32) +... + +--- +name: ssubsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: ssubsat_s32 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[SSUBSAT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_SSUBSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ssubsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GCN-LABEL: name: ssubsat_s64 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GCN: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GCN: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GCN: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GCN: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GCN: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GCN: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GCN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GCN: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GCN: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GCN: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GCN: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GCN: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GCN: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SSUBSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +...