diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -88,6 +88,12 @@ bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + void buildMultiply(LegalizerHelper &Helper, MutableArrayRef Accum, + ArrayRef Src0, ArrayRef Src1, + bool UsePartialMad64_32, + bool SeparateOddAlignedProducts) const; + bool legalizeMul(LegalizerHelper &Helper, MachineInstr &MI) const; bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -530,13 +530,22 @@ if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) { // Full set of gfx9 features. - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32, S16, V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .maxScalar(0, S32); + + getActionDefinitionsBuilder(G_MUL) + .legalFor({S32, S16, V2S16}) .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .minScalar(0, S16) .widenScalarToNextMultipleOf(0, 32) - .maxScalar(0, S32) - .scalarize(0); + .custom(); + assert(ST.hasMad64_32()); getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier @@ -546,13 +555,21 @@ .widenScalarToNextPow2(0, 32) .lower(); } else if (ST.has16BitInsts()) { - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32, S16}) .minScalar(0, S16) .widenScalarToNextMultipleOf(0, 32) .maxScalar(0, S32) .scalarize(0); + getActionDefinitionsBuilder(G_MUL) + .legalFor({S32, S16}) + .scalarize(0) + .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); + assert(ST.hasMad64_32()); + // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) @@ -569,12 +586,23 @@ .scalarize(0) .lower(); } else { - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32}) .widenScalarToNextMultipleOf(0, 32) .clampScalar(0, S32, S32) .scalarize(0); + auto &Mul = getActionDefinitionsBuilder(G_MUL) + .legalFor({S32}) + .scalarize(0) + .minScalar(0, S32) + .widenScalarToNextMultipleOf(0, 32); + + if (ST.hasMad64_32()) + Mul.custom(); + else + Mul.maxScalar(0, S32); + if (ST.hasIntClamp()) { getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) .legalFor({S32}) // Clamp modifier. @@ -1763,6 +1791,8 @@ return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, B); + case TargetOpcode::G_MUL: + return legalizeMul(Helper, MI); case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: return legalizeCTLZ_CTTZ(MI, MRI, B); @@ -2861,6 +2891,299 @@ return true; } +// Build a big integer multiply or multiply-add using MAD_64_32 instructions. +// +// Source and accumulation registers must all be 32-bits. +// +// TODO: When the multiply is uniform, we should produce a code sequence +// that is better suited to instruction selection on the SALU. Instead of +// the outer loop going over parts of the result, the outer loop should go +// over parts of one of the factors. This should result in instruction +// selection that makes full use of S_ADDC_U32 instructions. +void AMDGPULegalizerInfo::buildMultiply( + LegalizerHelper &Helper, MutableArrayRef Accum, + ArrayRef Src0, ArrayRef Src1, + bool UsePartialMad64_32, bool SeparateOddAlignedProducts) const { + // Use (possibly empty) vectors of S1 registers to represent the set of + // carries from one pair of positions to the next. + using Carry = SmallVector; + + MachineIRBuilder &B = Helper.MIRBuilder; + + const LLT S1 = LLT::scalar(1); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + + Register Zero32; + Register Zero64; + + auto getZero32 = [&]() -> Register { + if (!Zero32) + Zero32 = B.buildConstant(S32, 0).getReg(0); + return Zero32; + }; + auto getZero64 = [&]() -> Register { + if (!Zero64) + Zero64 = B.buildConstant(S64, 0).getReg(0); + return Zero64; + }; + + // Merge the given carries into the 32-bit LocalAccum, which is modified + // in-place. + // + // Returns the carry-out, which is a single S1 register or null. + auto mergeCarry = + [&](Register &LocalAccum, const Carry &CarryIn) -> Register { + if (CarryIn.empty()) + return Register(); + + bool HaveCarryOut = true; + Register CarryAccum; + if (CarryIn.size() == 1) { + if (!LocalAccum) { + LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + return Register(); + } + + CarryAccum = getZero32(); + } else { + CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); + for (unsigned i = 1; i + 1 < CarryIn.size(); ++i) { + CarryAccum = + B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i]) + .getReg(0); + } + + if (!LocalAccum) { + LocalAccum = getZero32(); + HaveCarryOut = false; + } + } + + auto Add = + B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back()); + LocalAccum = Add.getReg(0); + return HaveCarryOut ? Add.getReg(1) : Register(); + }; + + // Build a multiply-add chain to compute + // + // LocalAccum + (partial products at DstIndex) + // + (opportunistic subset of CarryIn) + // + // LocalAccum is an array of one or two 32-bit registers that are updated + // in-place. The incoming registers may be null. + // + // In some edge cases, carry-ins can be consumed "for free". In that case, + // the consumed carry bits are removed from CarryIn in-place. + auto buildMadChain = + [&](MutableArrayRef LocalAccum, unsigned DstIndex, Carry &CarryIn) + -> Carry { + assert((DstIndex + 1 < Accum.size() && LocalAccum.size() == 2) || + (DstIndex + 1 >= Accum.size() && LocalAccum.size() == 1)); + + Register Tmp; + Carry CarryOut; + unsigned j0 = 0; + + // Use plain 32-bit multiplication for the most significant part of the + // result by default. + if (LocalAccum.size() == 1 && + (!UsePartialMad64_32 || !CarryIn.empty())) { + do { + unsigned j1 = DstIndex - j0; + auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]); + if (!LocalAccum[0]) { + LocalAccum[0] = Mul.getReg(0); + } else { + if (CarryIn.empty()) { + LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0); + } else { + LocalAccum[0] = + B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back()) + .getReg(0); + CarryIn.pop_back(); + } + } + ++j0; + } while (j0 <= DstIndex && (!UsePartialMad64_32 || !CarryIn.empty())); + } + + // Build full 64-bit multiplies. + if (j0 <= DstIndex) { + bool HaveSmallAccum = false; + Register Tmp; + + if (LocalAccum[0]) { + if (LocalAccum.size() == 1) { + Tmp = B.buildAnyExt(S64, LocalAccum[0]).getReg(0); + HaveSmallAccum = true; + } else if (LocalAccum[1]) { + Tmp = B.buildMerge(S64, LocalAccum).getReg(0); + HaveSmallAccum = false; + } else { + Tmp = B.buildZExt(S64, LocalAccum[0]).getReg(0); + HaveSmallAccum = true; + } + } else { + assert(LocalAccum.size() == 1 || !LocalAccum[1]); + Tmp = getZero64(); + HaveSmallAccum = true; + } + + do { + unsigned j1 = DstIndex - j0; + auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1}, + {Src0[j0], Src1[j1], Tmp}); + Tmp = Mad.getReg(0); + if (!HaveSmallAccum) + CarryOut.push_back(Mad.getReg(1)); + HaveSmallAccum = false; + ++j0; + } while (j0 <= DstIndex); + + auto Unmerge = B.buildUnmerge(S32, Tmp); + LocalAccum[0] = Unmerge.getReg(0); + if (LocalAccum.size() > 1) + LocalAccum[1] = Unmerge.getReg(1); + } + + return CarryOut; + }; + + // Outer multiply loop, iterating over destination parts from least + // significant to most significant parts. + // + // The columns of the following diagram correspond to the destination parts + // affected by one iteration of the outer loops (ignoring boundary + // conditions). + // + // Dest index relative to 2 * i: 1 0 -1 + // ------ + // Carries from previous iteration: e o + // Even-aligned partial product sum: E E . + // Odd-aligned partial product sum: O O + // + // 'o' is OddCarry, 'e' is EvenCarry. + // EE and OO are computed from partial products via buildMadChain and use + // accumulation where possible and appropriate. + // + Register SeparateOddCarry; + Carry EvenCarry; + Carry OddCarry; + + for (unsigned i = 0; i <= Accum.size() / 2; ++i) { + Carry OddCarryIn = std::move(OddCarry); + Carry EvenCarryIn = std::move(EvenCarry); + OddCarry.clear(); + EvenCarry.clear(); + + // Partial products at offset 2 * i. + if (2 * i < Accum.size()) { + auto LocalAccum = Accum.drop_front(2 * i).take_front(2); + EvenCarry = buildMadChain(LocalAccum, 2 * i, EvenCarryIn); + } + + // Partial products at offset 2 * i - 1. + if (i > 0) { + if (!SeparateOddAlignedProducts) { + auto LocalAccum = Accum.drop_front(2 * i - 1).take_front(2); + OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn); + } else { + bool IsHighest = 2 * i >= Accum.size(); + Register SeparateOddOut[2]; + auto LocalAccum = makeMutableArrayRef(SeparateOddOut) + .take_front(IsHighest ? 1 : 2); + OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn); + + MachineInstr *Lo; + + if (i == 1) { + if (!IsHighest) + Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]); + else + Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]); + } else { + Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0], + SeparateOddCarry); + } + Accum[2 * i - 1] = Lo->getOperand(0).getReg(); + + if (!IsHighest) { + auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1], + Lo->getOperand(1).getReg()); + Accum[2 * i] = Hi.getReg(0); + SeparateOddCarry = Hi.getReg(1); + } + } + } + + // Add in the carries from the previous iteration + if (i > 0) { + if (Register CarryOut = mergeCarry(Accum[2 * i - 1], OddCarryIn)) + EvenCarryIn.push_back(CarryOut); + + if (2 * i < Accum.size()) { + if (Register CarryOut = mergeCarry(Accum[2 * i], EvenCarryIn)) + OddCarry.push_back(CarryOut); + } + } + } +} + +// Custom narrowing of wide multiplies using wide multiply-add instructions. +// +// If the multiply is followed by an addition, we attempt to integrate it to +// make better use of multiply-add capabilities. +bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper, + MachineInstr &MI) const { + assert(ST.hasMad64_32()); + assert(MI.getOpcode() == TargetOpcode::G_MUL); + + MachineIRBuilder &B = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *B.getMRI(); + + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + LLT Ty = MRI.getType(DstReg); + assert(Ty.isScalar()); + + unsigned Size = Ty.getSizeInBits(); + unsigned NumParts = Size / 32; + assert((Size % 32) == 0); + assert(NumParts >= 2); + + // Whether to use MAD_64_32 for partial products whose high half is + // discarded. This avoids some ADD instructions but risks false dependency + // stalls on some subtargets in some cases. + const bool UsePartialMad64_32 = ST.getGeneration() < AMDGPUSubtarget::GFX10; + + // Whether to compute odd-aligned partial products separately. This is + // advisable on subtargets where the accumulator of MAD_64_32 must be placed + // in an even-aligned VGPR. + const bool SeparateOddAlignedProducts = ST.hasFullRate64Ops(); + + LLT S32 = LLT::scalar(32); + SmallVector Src0Parts, Src1Parts; + for (unsigned i = 0; i < NumParts; ++i) { + Src0Parts.push_back(MRI.createGenericVirtualRegister(S32)); + Src1Parts.push_back(MRI.createGenericVirtualRegister(S32)); + } + B.buildUnmerge(Src0Parts, Src0); + B.buildUnmerge(Src1Parts, Src1); + + SmallVector AccumRegs(NumParts); + buildMultiply(Helper, AccumRegs, Src0Parts, Src1Parts, UsePartialMad64_32, + SeparateOddAlignedProducts); + + B.buildMerge(DstReg, AccumRegs); + MI.eraseFromParent(); + return true; + +} + // Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to // ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input // case with a single min instruction instead of a compare+select. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -108,39 +108,42 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_mul_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX10-LABEL: name: test_mul_s64 ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -186,22 +189,23 @@ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV14]], [[C]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV17]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV15]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV13]](s32), [[UV14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV18]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_mul_v2s64 @@ -211,22 +215,23 @@ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV14]], [[C]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV17]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV12]](s32), [[UV15]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV13]](s32), [[UV14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX10-LABEL: name: test_mul_v2s64 @@ -236,22 +241,23 @@ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV10]](s32), [[UV12]], [[C]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UV13]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL2]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UV12]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[MUL3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[ADD3]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -623,39 +629,42 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_mul_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV5]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV2]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV6]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX10-LABEL: name: test_mul_s33 ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV2]], [[C]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[ADD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -703,79 +712,56 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV5]], [[C]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV4]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV2]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_4]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[UV8]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV10]](s32), [[UV11]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) ; GFX9-LABEL: name: test_mul_s96 ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV5]], [[C]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV4]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV2]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_4]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[UV8]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV10]](s32), [[UV11]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) ; GFX10-LABEL: name: test_mul_s96 ; GFX10: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] - ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] - ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV3]], [[C]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL2]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV7]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV]](s32), [[UV4]], [[MV]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV1]](s32), [[UV3]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV8]](s32), [[UV9]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV1]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_MUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -656,123 +656,123 @@ ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -780,10 +780,10 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_sdiv_s64 @@ -826,123 +826,123 @@ ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -950,10 +950,10 @@ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX10-LABEL: name: test_sdiv_s64 @@ -996,123 +996,123 @@ ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -1120,10 +1120,10 @@ ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -1511,123 +1511,123 @@ ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -1635,28 +1635,28 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV54]](s32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV55]](s32) ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1667,125 +1667,125 @@ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] - ; GFX8-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX8-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV56]], [[UV58]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX8-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX8-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX8-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX8-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX8-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX8-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX8-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX8-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX8-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX8-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX8-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX8-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX8-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX8-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX8-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX8-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX8-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX8-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX8-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV69]], [[UV76]] + ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV79]] ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV78]] ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV79]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV78]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] + ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] + ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] @@ -1793,10 +1793,10 @@ ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX8-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] ; GFX8-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] - ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX8-NEXT: [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX8-NEXT: [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV84]], [[UV86]] + ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -1842,123 +1842,123 @@ ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -1966,28 +1966,28 @@ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV43]], [[UV45]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]] ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]] ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV54]](s32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV55]](s32) ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1998,125 +1998,125 @@ ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] - ; GFX9-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX9-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV56]], [[UV58]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV57]], [[UV59]], [[USUBO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]] ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX9-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX9-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX9-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX9-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX9-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX9-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX9-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]] ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX9-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] - ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX9-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]] + ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX9-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX9-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]] + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) - ; GFX9-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]] ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) - ; GFX9-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX9-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX9-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX9-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX9-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX9-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX9-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]] + ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV69]], [[UV76]] + ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV79]] ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV78]] ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV79]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV78]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]] ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]] + ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]] ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]] ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]] + ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]] ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] @@ -2124,10 +2124,10 @@ ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX9-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] ; GFX9-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] - ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX9-NEXT: [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX9-NEXT: [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV84]], [[UV86]] + ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV85]], [[UV87]], [[USUBO15]] ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -2173,123 +2173,123 @@ ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV30]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV30]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV32]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV34]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -2297,28 +2297,28 @@ ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]] ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]] ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV48]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV49]](s32) ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -2329,125 +2329,125 @@ ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV50]], [[UV52]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV51]], [[UV53]], [[USUBO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV54]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV54]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]] ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV54]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]] ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]] + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV56]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]] ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV56]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]] ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] - ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] - ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO68]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO68]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]] ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] - ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] - ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO68]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE18]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]] ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE18]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) - ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] - ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] - ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] - ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] - ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDO78]], [[C5]] + ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDO78]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV58]], [[UV64]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD35]] + ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV66]] ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV67]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV66]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]] ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] - ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] - ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV68]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]] ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV66]] ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV67]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] - ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV70]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]] ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] @@ -2455,10 +2455,10 @@ ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) - ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] - ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX10-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX10-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV72]], [[UV74]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV73]], [[UV75]], [[USUBO15]] ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -3507,123 +3507,123 @@ ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -3631,10 +3631,10 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_sdiv_s33 @@ -3679,123 +3679,123 @@ ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -3803,10 +3803,10 @@ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX10-LABEL: name: test_sdiv_s33 @@ -3851,123 +3851,123 @@ ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] @@ -3975,10 +3975,10 @@ ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO7]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -607,118 +607,118 @@ ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -726,10 +726,10 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_srem_s64 @@ -772,118 +772,118 @@ ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -891,10 +891,10 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX10-LABEL: name: test_srem_s64 @@ -937,118 +937,118 @@ ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -1056,10 +1056,10 @@ ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[UV30]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -1438,118 +1438,118 @@ ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV36]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -1557,28 +1557,28 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV50]](s32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV51]](s32) ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1589,121 +1589,121 @@ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] - ; GFX8-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX8-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV57]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX8-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX8-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX8-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX8-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX8-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX8-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX8-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX8-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] - ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX8-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX8-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX8-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX8-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX8-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX8-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX8-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX8-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX8-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX8-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX8-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX8-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] + ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV65]], [[UV72]] ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV75]] ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV74]] ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV75]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] - ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV74]] + ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV75]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV74]] ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV75]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] - ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX8-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV74]] + ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] ; GFX8-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] @@ -1711,10 +1711,10 @@ ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX8-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX8-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV76]], [[UV78]] + ; GFX8-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -1760,118 +1760,118 @@ ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV27]], [[UV34]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV36]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV36]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV37]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV36]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV37]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -1879,28 +1879,28 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV40]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]] ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]] ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV50]](s32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV51]](s32) ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1911,121 +1911,121 @@ ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] - ; GFX9-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX9-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV57]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]] ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX9-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX9-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX9-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX9-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX9-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX9-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX9-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]] ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX9-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] - ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX9-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX9-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]] + ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX9-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX9-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) - ; GFX9-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]] ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) - ; GFX9-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX9-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX9-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX9-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX9-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX9-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX9-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]] + ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]] + ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV65]], [[UV72]] ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV75]] ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV74]] ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV75]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] - ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV74]] + ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV75]], [[USUBO13]] ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV75]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV74]] ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV75]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] - ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX9-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV74]] + ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV75]], [[USUBO15]] ; GFX9-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] @@ -2033,10 +2033,10 @@ ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX9-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX9-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV76]], [[UV78]] + ; GFX9-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV77]], [[UV79]], [[USUBO19]] ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -2082,118 +2082,118 @@ ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV30]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV30]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV31]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV30]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV31]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -2201,28 +2201,28 @@ ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] - ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]] ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] - ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]] ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV44]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV45]](s32) ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -2233,121 +2233,121 @@ ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) - ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV46]], [[UV48]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV47]], [[UV49]], [[USUBO11]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]] + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV50]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV50]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]] ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV50]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]] ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]] + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV52]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV52]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]] ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV52]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]] ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] - ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) - ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] - ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO64]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO64]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]] ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] - ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] - ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] - ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO64]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]] ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] - ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] - ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] - ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE14]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDO74]], [[C5]] + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDO74]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV54]], [[UV60]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD35]] ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) - ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV63]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV62]] ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV63]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] - ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV62]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV63]], [[USUBO13]] ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV63]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV62]] ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV63]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] - ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV62]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV63]], [[USUBO15]] ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] @@ -2355,10 +2355,10 @@ ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] - ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) - ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) - ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] - ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV66]] + ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV67]], [[USUBO19]] ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -3334,118 +3334,118 @@ ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -3453,10 +3453,10 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_srem_s33 @@ -3501,118 +3501,118 @@ ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[UV30]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV32]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV33]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV32]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV33]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -3620,10 +3620,10 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV36]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX10-LABEL: name: test_srem_s33 @@ -3668,118 +3668,118 @@ ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] - ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV26]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV27]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV27]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -3787,10 +3787,10 @@ ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[UV30]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[UV31]], [[USUBO9]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -503,123 +503,123 @@ ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] @@ -651,123 +651,123 @@ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] @@ -799,123 +799,123 @@ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] @@ -1249,131 +1249,131 @@ ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV34]](s32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV35]](s32) ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1384,125 +1384,125 @@ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] - ; GFX8-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX8-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX8-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX8-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX8-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX8-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX8-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX8-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX8-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX8-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX8-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX8-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX8-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX8-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX8-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX8-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX8-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) - ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX8-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX8-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX8-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX8-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX8-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] - ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] + ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[UV56]] + ; GFX8-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV59]] ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV58]] ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV59]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV58]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] + ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] + ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] @@ -1537,131 +1537,131 @@ ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV34]](s32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV35]](s32) ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1672,125 +1672,125 @@ ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] - ; GFX9-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX9-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV36]], [[UV38]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV37]], [[UV39]], [[USUBO7]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]] ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] - ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX9-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX9-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX9-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX9-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX9-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX9-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX9-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) - ; GFX9-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]] ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX9-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] - ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX9-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]] + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX9-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX9-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) - ; GFX9-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]] ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) - ; GFX9-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX9-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) - ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX9-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX9-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX9-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX9-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX9-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] - ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32) + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]] + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]] + ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[UV56]] + ; GFX9-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV59]] ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV58]] ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV59]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV58]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]] ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]] + ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]] ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]] + ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] @@ -1825,131 +1825,131 @@ ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV22]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV22]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV24]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV26]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV28]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV29]](s32) ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1960,125 +1960,125 @@ ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV34]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV34]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]] ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV34]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]] ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV36]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV36]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]] ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] - ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV36]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]] ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] - ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO60]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO60]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]] ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] - ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] - ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] - ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO60]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]] ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE10]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) - ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] - ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] - ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] - ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] - ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] - ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDO70]], [[C4]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDO70]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV44]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD35]] + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV46]] ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV47]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV46]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]] ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] - ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] - ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV48]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV46]] ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV47]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) - ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] - ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV50]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] @@ -2918,123 +2918,123 @@ ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] @@ -3069,123 +3069,123 @@ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] @@ -3220,123 +3220,123 @@ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] - ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] - ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -128,13 +128,13 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) @@ -169,13 +169,13 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) @@ -226,54 +226,54 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) @@ -309,54 +309,54 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -467,118 +467,118 @@ ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] @@ -611,118 +611,118 @@ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] @@ -755,118 +755,118 @@ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV18]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] @@ -1194,127 +1194,127 @@ ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV28]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) - ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV30]](s32) + ; GFX8-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV31]](s32) ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1325,121 +1325,121 @@ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX8-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] - ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] - ; GFX8-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX8-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX8-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] + ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] + ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV37]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] + ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] + ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX8-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] + ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] + ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] + ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] + ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX8-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX8-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX8-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX8-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX8-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX8-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX8-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] + ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] + ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] + ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX8-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX8-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] + ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] + ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] + ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX8-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX8-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX8-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX8-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] + ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] - ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX8-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] + ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] + ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX8-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX8-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX8-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX8-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] + ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] + ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] + ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX8-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX8-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX8-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX8-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX8-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX8-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX8-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX8-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX8-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] + ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] + ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[UV52]] ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV55]] ; GFX8-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX8-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] ; GFX8-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX8-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV55]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] - ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] ; GFX8-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV55]] ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] ; GFX8-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV55]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] - ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX8-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX8-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] ; GFX8-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] @@ -1475,127 +1475,127 @@ ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[UV26]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV28]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV28]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV29]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV28]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV29]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) - ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV30]](s32) + ; GFX9-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV31]](s32) ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1606,121 +1606,121 @@ ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX9-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] - ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] - ; GFX9-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX9-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX9-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV32]], [[UV34]] + ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV33]], [[UV35]], [[USUBO9]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_18:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_19:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] + ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_18]](s64) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV37]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_20:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_21:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI3]], [[ANYEXT3]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_22:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_23:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[FPTOUI2]], [[AMDGPU_MAD_U64_U32_20]] + ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_22]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]] + ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]] + ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) - ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX9-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]] + ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]] + ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]] + ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]] ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) - ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] - ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]] ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) - ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]] + ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]] + ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]] ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] - ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX9-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX9-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX9-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX9-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX9-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX9-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX9-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64) + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]] + ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64) + ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]] + ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]] + ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]] + ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) - ; GFX9-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX9-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]] + ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]] + ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]] + ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]] ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) - ; GFX9-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] - ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]] ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) - ; GFX9-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX9-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX9-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]] + ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]] + ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]] ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] - ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX9-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]] + ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]] + ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]] + ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]] ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) - ; GFX9-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX9-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX9-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX9-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]] + ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]] + ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]] + ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]] ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) - ; GFX9-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] - ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]] ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) - ; GFX9-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX9-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX9-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX9-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX9-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX9-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX9-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX9-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]] + ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]] + ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]] + ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]] + ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]] + ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64) + ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]] + ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[UV52]] ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV55]] ; GFX9-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] ; GFX9-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX9-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV55]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] - ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] ; GFX9-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV55]] ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] ; GFX9-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV55]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] - ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX9-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX9-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV55]], [[USUBO13]] ; GFX9-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] @@ -1756,127 +1756,127 @@ ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C4]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV22]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV22]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV23]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV22]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV23]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] - ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) - ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV24]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV25]](s32) ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) @@ -1887,121 +1887,121 @@ ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) - ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) - ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] - ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] - ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] - ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] - ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] - ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] - ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] - ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] - ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV30]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV30]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]] ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) - ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]] ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] - ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] - ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] - ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV30]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]] ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) - ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]] ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] - ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] - ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]] ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] - ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] - ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] - ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] - ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] - ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] - ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] - ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] - ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] - ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]] + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64) + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV32]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV32]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]] ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) - ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] - ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] - ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] - ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV32]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]] ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) - ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] - ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] - ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]] ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] - ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] - ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] - ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO56]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]] ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) - ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]] ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] - ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] - ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] - ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] - ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO56]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]] ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) - ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]] ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] - ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] - ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] - ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] - ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] - ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] - ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] - ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] - ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] - ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] - ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] - ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE6]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDO66]], [[C4]] + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD33]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDO66]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV40]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD35]] ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) - ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV43]] ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) - ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV42]] ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) - ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV43]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] - ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] - ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV42]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV43]], [[USUBO11]] ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) - ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV43]] ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) - ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV42]] ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) - ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV43]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] - ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] - ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV42]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV43]], [[USUBO13]] ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] @@ -2782,118 +2782,118 @@ ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX8-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -2929,118 +2929,118 @@ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV7]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] - ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]] + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]] + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]] ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) - ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] - ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]] + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]] + ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) - ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]] + ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]] + ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]] ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) - ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] - ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]] ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) - ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]] + ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]] + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]] + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32) + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]] + ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]] + ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[UV22]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] ; GFX9-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] @@ -3076,118 +3076,118 @@ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] - ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI]], [[C5]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]] ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] - ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]] ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] - ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]] ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] - ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] - ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]] ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] - ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] - ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] - ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] - ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] - ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] - ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] - ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64) + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]] ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]] ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] - ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] - ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]] ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] - ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] - ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] - ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] - ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]] ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) - ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]] ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] - ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] - ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] - ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]] ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) - ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] - ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] - ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] - ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] - ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] - ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] - ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] - ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] - ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] - ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] - ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64) + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] - ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] - ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV18]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV19]], [[USUBO3]] ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) - ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]] ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) - ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] - ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] - ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV18]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV19]], [[USUBO5]] ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -282,11 +282,11 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX7-NEXT: s_mul_i32 s4, s0, s2 -; GFX7-NEXT: s_mul_i32 s1, s1, s2 ; GFX7-NEXT: s_mul_i32 s0, s0, s3 -; GFX7-NEXT: s_add_i32 s1, s1, s0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s1, v0 -; GFX7-NEXT: v_readfirstlane_b32 s1, v0 +; GFX7-NEXT: s_mul_i32 s1, s1, s2 +; GFX7-NEXT: v_readfirstlane_b32 s5, v0 +; GFX7-NEXT: s_add_u32 s0, s0, s5 +; GFX7-NEXT: s_add_u32 s1, s1, s0 ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: ; return to shader part epilog ; @@ -295,33 +295,33 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX8-NEXT: s_mul_i32 s4, s0, s2 -; GFX8-NEXT: s_mul_i32 s1, s1, s2 ; GFX8-NEXT: s_mul_i32 s0, s0, s3 -; GFX8-NEXT: s_add_i32 s1, s1, s0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v0 +; GFX8-NEXT: s_mul_i32 s1, s1, s2 +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 +; GFX8-NEXT: s_add_u32 s0, s0, s5 +; GFX8-NEXT: s_add_u32 s1, s1, s0 ; GFX8-NEXT: s_mov_b32 s0, s4 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_mul_i33: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mul_i32 s1, s1, s2 -; GFX9-NEXT: s_mul_i32 s3, s0, s3 ; GFX9-NEXT: s_mul_i32 s4, s0, s2 -; GFX9-NEXT: s_mul_hi_u32 s0, s0, s2 -; GFX9-NEXT: s_add_i32 s1, s1, s3 -; GFX9-NEXT: s_add_i32 s1, s1, s0 +; GFX9-NEXT: s_mul_hi_u32 s5, s0, s2 +; GFX9-NEXT: s_mul_i32 s0, s0, s3 +; GFX9-NEXT: s_add_u32 s0, s0, s5 +; GFX9-NEXT: s_mul_i32 s1, s1, s2 +; GFX9-NEXT: s_add_u32 s1, s1, s0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_mul_i33: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mul_i32 s1, s1, s2 -; GFX10-NEXT: s_mul_i32 s3, s0, s3 ; GFX10-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX10-NEXT: s_add_i32 s1, s1, s3 +; GFX10-NEXT: s_mul_i32 s3, s0, s3 +; GFX10-NEXT: s_mul_i32 s1, s1, s2 +; GFX10-NEXT: s_add_i32 s3, s4, s3 ; GFX10-NEXT: s_mul_i32 s0, s0, s2 -; GFX10-NEXT: s_add_i32 s1, s1, s4 +; GFX10-NEXT: s_add_i32 s1, s3, s1 ; GFX10-NEXT: ; return to shader part epilog %result = mul i33 %num, %den ret i33 %result @@ -333,11 +333,11 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX7-NEXT: s_mul_i32 s4, s0, s2 -; GFX7-NEXT: s_mul_i32 s1, s1, s2 ; GFX7-NEXT: s_mul_i32 s0, s0, s3 -; GFX7-NEXT: s_add_i32 s1, s1, s0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s1, v0 -; GFX7-NEXT: v_readfirstlane_b32 s1, v0 +; GFX7-NEXT: s_mul_i32 s1, s1, s2 +; GFX7-NEXT: v_readfirstlane_b32 s5, v0 +; GFX7-NEXT: s_add_u32 s0, s0, s5 +; GFX7-NEXT: s_add_u32 s1, s1, s0 ; GFX7-NEXT: s_mov_b32 s0, s4 ; GFX7-NEXT: ; return to shader part epilog ; @@ -346,82 +346,59 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX8-NEXT: s_mul_i32 s4, s0, s2 -; GFX8-NEXT: s_mul_i32 s1, s1, s2 ; GFX8-NEXT: s_mul_i32 s0, s0, s3 -; GFX8-NEXT: s_add_i32 s1, s1, s0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v0 +; GFX8-NEXT: s_mul_i32 s1, s1, s2 +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 +; GFX8-NEXT: s_add_u32 s0, s0, s5 +; GFX8-NEXT: s_add_u32 s1, s1, s0 ; GFX8-NEXT: s_mov_b32 s0, s4 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_mul_i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mul_i32 s1, s1, s2 -; GFX9-NEXT: s_mul_i32 s3, s0, s3 ; GFX9-NEXT: s_mul_i32 s4, s0, s2 -; GFX9-NEXT: s_mul_hi_u32 s0, s0, s2 -; GFX9-NEXT: s_add_i32 s1, s1, s3 -; GFX9-NEXT: s_add_i32 s1, s1, s0 +; GFX9-NEXT: s_mul_hi_u32 s5, s0, s2 +; GFX9-NEXT: s_mul_i32 s0, s0, s3 +; GFX9-NEXT: s_add_u32 s0, s0, s5 +; GFX9-NEXT: s_mul_i32 s1, s1, s2 +; GFX9-NEXT: s_add_u32 s1, s1, s0 ; GFX9-NEXT: s_mov_b32 s0, s4 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_mul_i64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mul_i32 s1, s1, s2 -; GFX10-NEXT: s_mul_i32 s3, s0, s3 ; GFX10-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX10-NEXT: s_add_i32 s1, s1, s3 +; GFX10-NEXT: s_mul_i32 s3, s0, s3 +; GFX10-NEXT: s_mul_i32 s1, s1, s2 +; GFX10-NEXT: s_add_i32 s3, s4, s3 ; GFX10-NEXT: s_mul_i32 s0, s0, s2 -; GFX10-NEXT: s_add_i32 s1, s1, s4 +; GFX10-NEXT: s_add_i32 s1, s3, s1 ; GFX10-NEXT: ; return to shader part epilog %result = mul i64 %num, %den ret i64 %result } define i64 @v_mul_i64(i64 %num, i64 %den) { -; GFX7-LABEL: v_mul_i64: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_lo_u32 v1, v1, v2 -; GFX7-NEXT: v_mul_lo_u32 v4, v0, v3 -; GFX7-NEXT: v_mul_lo_u32 v3, v0, v2 -; GFX7-NEXT: v_mul_hi_u32 v0, v0, v2 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v0, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_mul_i64: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v4, v0, v3 -; GFX8-NEXT: v_mul_lo_u32 v3, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v0, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, v3 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_mul_i64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 -; GFX9-NEXT: v_mul_lo_u32 v3, v0, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, v2 -; GFX9-NEXT: v_add3_u32 v1, v1, v3, v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_mul_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v0 +; GCN-NEXT: v_mov_b32_e32 v5, v1 +; GCN-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 +; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[1:2] +; GCN-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_mul_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2 -; GFX10-NEXT: v_mul_lo_u32 v3, v0, v3 -; GFX10-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 -; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4 +; GFX10-NEXT: v_mov_b32_e32 v4, v0 +; GFX10-NEXT: v_mov_b32_e32 v5, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 +; GFX10-NEXT: v_mul_lo_u32 v3, v4, v3 +; GFX10-NEXT: v_mul_lo_u32 v2, v5, v2 +; GFX10-NEXT: v_add3_u32 v1, v1, v3, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i64 %num, %den ret i64 %result @@ -432,28 +409,25 @@ ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s3 ; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX7-NEXT: s_mul_i32 s7, s1, s3 -; GFX7-NEXT: s_mul_i32 s8, s0, s4 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_add_u32 s7, s7, s8 -; GFX7-NEXT: v_mul_hi_u32 v2, v2, s3 -; GFX7-NEXT: v_mov_b32_e32 v3, s4 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX7-NEXT: s_mul_i32 s5, s0, s5 +; GFX7-NEXT: v_readfirstlane_b32 s7, v0 +; GFX7-NEXT: s_mul_i32 s8, s1, s4 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: s_add_u32 s5, s8, s5 ; GFX7-NEXT: s_mul_i32 s2, s2, s3 -; GFX7-NEXT: s_mul_i32 s7, s1, s4 -; GFX7-NEXT: v_mul_hi_u32 v3, s0, v3 +; GFX7-NEXT: v_mul_hi_u32 v0, v0, s3 ; GFX7-NEXT: s_mul_i32 s6, s0, s3 -; GFX7-NEXT: s_cselect_b32 s8, 1, 0 -; GFX7-NEXT: s_mul_i32 s5, s0, s5 -; GFX7-NEXT: s_add_i32 s0, s2, s7 -; GFX7-NEXT: s_add_i32 s0, s0, s5 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s8, v1 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_readfirstlane_b32 s1, v0 -; GFX7-NEXT: v_readfirstlane_b32 s2, v1 +; GFX7-NEXT: s_add_u32 s2, s2, s5 +; GFX7-NEXT: s_mul_i32 s0, s0, s4 +; GFX7-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7-NEXT: s_add_u32 s0, s0, s7 +; GFX7-NEXT: s_addc_u32 s2, s4, s2 +; GFX7-NEXT: s_mul_i32 s1, s1, s3 +; GFX7-NEXT: v_readfirstlane_b32 s3, v0 +; GFX7-NEXT: s_add_u32 s1, s1, s0 +; GFX7-NEXT: s_addc_u32 s2, s3, s2 ; GFX7-NEXT: s_mov_b32 s0, s6 ; GFX7-NEXT: ; return to shader part epilog ; @@ -461,78 +435,66 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s3 ; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX8-NEXT: s_mul_i32 s7, s1, s3 -; GFX8-NEXT: s_mul_i32 s8, s0, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, s1 -; GFX8-NEXT: s_add_u32 s7, s7, s8 -; GFX8-NEXT: v_mul_hi_u32 v2, v2, s3 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s7, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX8-NEXT: s_mul_i32 s5, s0, s5 +; GFX8-NEXT: v_readfirstlane_b32 s7, v0 +; GFX8-NEXT: s_mul_i32 s8, s1, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: s_add_u32 s5, s8, s5 ; GFX8-NEXT: s_mul_i32 s2, s2, s3 -; GFX8-NEXT: s_mul_i32 s7, s1, s4 -; GFX8-NEXT: v_mul_hi_u32 v3, s0, v3 +; GFX8-NEXT: v_mul_hi_u32 v0, v0, s3 ; GFX8-NEXT: s_mul_i32 s6, s0, s3 -; GFX8-NEXT: s_cselect_b32 s8, 1, 0 -; GFX8-NEXT: s_mul_i32 s5, s0, s5 -; GFX8-NEXT: s_add_i32 s0, s2, s7 -; GFX8-NEXT: s_add_i32 s0, s0, s5 -; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_readfirstlane_b32 s1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s2, v1 +; GFX8-NEXT: s_add_u32 s2, s2, s5 +; GFX8-NEXT: s_mul_i32 s0, s0, s4 +; GFX8-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-NEXT: s_add_u32 s0, s0, s7 +; GFX8-NEXT: s_addc_u32 s2, s4, s2 +; GFX8-NEXT: s_mul_i32 s1, s1, s3 +; GFX8-NEXT: v_readfirstlane_b32 s3, v0 +; GFX8-NEXT: s_add_u32 s1, s1, s0 +; GFX8-NEXT: s_addc_u32 s2, s3, s2 ; GFX8-NEXT: s_mov_b32 s0, s6 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_mul_i96: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mul_i32 s7, s1, s3 -; GFX9-NEXT: s_mul_i32 s8, s0, s4 -; GFX9-NEXT: s_mul_hi_u32 s9, s0, s3 -; GFX9-NEXT: s_add_u32 s7, s7, s8 -; GFX9-NEXT: s_cselect_b32 s8, 1, 0 -; GFX9-NEXT: s_add_u32 s7, s7, s9 -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 -; GFX9-NEXT: s_add_i32 s8, s8, s9 -; GFX9-NEXT: s_mul_i32 s2, s2, s3 -; GFX9-NEXT: s_mul_i32 s9, s1, s4 ; GFX9-NEXT: s_mul_i32 s5, s0, s5 -; GFX9-NEXT: s_add_i32 s2, s2, s9 -; GFX9-NEXT: s_mul_hi_u32 s1, s1, s3 -; GFX9-NEXT: s_add_i32 s2, s2, s5 +; GFX9-NEXT: s_mul_i32 s8, s1, s4 +; GFX9-NEXT: s_add_u32 s5, s8, s5 +; GFX9-NEXT: s_mul_i32 s2, s2, s3 +; GFX9-NEXT: s_mul_hi_u32 s7, s0, s3 +; GFX9-NEXT: s_add_u32 s2, s2, s5 +; GFX9-NEXT: s_mul_i32 s5, s0, s4 ; GFX9-NEXT: s_mul_i32 s6, s0, s3 ; GFX9-NEXT: s_mul_hi_u32 s0, s0, s4 -; GFX9-NEXT: s_add_i32 s1, s2, s1 -; GFX9-NEXT: s_add_i32 s0, s1, s0 -; GFX9-NEXT: s_add_i32 s2, s0, s8 +; GFX9-NEXT: s_add_u32 s4, s5, s7 +; GFX9-NEXT: s_addc_u32 s0, s0, s2 +; GFX9-NEXT: s_mul_i32 s2, s1, s3 +; GFX9-NEXT: s_mul_hi_u32 s3, s1, s3 +; GFX9-NEXT: s_add_u32 s1, s2, s4 +; GFX9-NEXT: s_addc_u32 s2, s3, s0 ; GFX9-NEXT: s_mov_b32 s0, s6 -; GFX9-NEXT: s_mov_b32 s1, s7 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_mul_i96: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mul_i32 s6, s1, s3 -; GFX10-NEXT: s_mul_i32 s7, s0, s4 -; GFX10-NEXT: s_mul_hi_u32 s8, s0, s3 -; GFX10-NEXT: s_add_u32 s6, s6, s7 +; GFX10-NEXT: s_mul_i32 s6, s0, s5 +; GFX10-NEXT: s_mul_i32 s7, s1, s4 ; GFX10-NEXT: s_mul_i32 s2, s2, s3 -; GFX10-NEXT: s_mul_i32 s9, s1, s4 -; GFX10-NEXT: s_cselect_b32 s7, 1, 0 -; GFX10-NEXT: s_add_u32 s6, s6, s8 -; GFX10-NEXT: s_cselect_b32 s8, 1, 0 -; GFX10-NEXT: s_mul_i32 s5, s0, s5 -; GFX10-NEXT: s_add_i32 s2, s2, s9 -; GFX10-NEXT: s_mul_hi_u32 s1, s1, s3 -; GFX10-NEXT: s_add_i32 s2, s2, s5 -; GFX10-NEXT: s_mul_hi_u32 s4, s0, s4 -; GFX10-NEXT: s_add_i32 s1, s2, s1 -; GFX10-NEXT: s_add_i32 s7, s7, s8 -; GFX10-NEXT: s_add_i32 s1, s1, s4 -; GFX10-NEXT: s_mul_i32 s0, s0, s3 -; GFX10-NEXT: s_add_i32 s2, s1, s7 -; GFX10-NEXT: s_mov_b32 s1, s6 +; GFX10-NEXT: s_add_i32 s6, s6, s7 +; GFX10-NEXT: s_mul_hi_u32 s7, s0, s3 +; GFX10-NEXT: s_add_i32 s6, s6, s2 +; GFX10-NEXT: s_mul_i32 s2, s0, s4 +; GFX10-NEXT: s_mul_i32 s5, s0, s3 +; GFX10-NEXT: s_mul_hi_u32 s0, s0, s4 +; GFX10-NEXT: s_add_u32 s2, s2, s7 +; GFX10-NEXT: s_mul_i32 s4, s1, s3 +; GFX10-NEXT: s_addc_u32 s0, s0, s6 +; GFX10-NEXT: s_mul_hi_u32 s3, s1, s3 +; GFX10-NEXT: s_add_u32 s1, s4, s2 +; GFX10-NEXT: s_addc_u32 s2, s3, s0 +; GFX10-NEXT: s_mov_b32 s0, s5 ; GFX10-NEXT: ; return to shader part epilog %result = mul i96 %num, %den %cast = bitcast i96 %result to <3 x i32> @@ -540,103 +502,33 @@ } define i96 @v_mul_i96(i96 %num, i96 %den) { -; GFX7-LABEL: v_mul_i96: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_lo_u32 v7, v1, v3 -; GFX7-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX7-NEXT: v_mul_hi_u32 v9, v0, v3 -; GFX7-NEXT: v_mul_lo_u32 v2, v2, v3 -; GFX7-NEXT: v_mul_lo_u32 v5, v0, v5 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GFX7-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GFX7-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX7-NEXT: v_mul_hi_u32 v1, v1, v3 -; GFX7-NEXT: v_mul_lo_u32 v6, v0, v3 -; GFX7-NEXT: v_mul_hi_u32 v0, v0, v4 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v0, v8 -; GFX7-NEXT: v_mov_b32_e32 v0, v6 -; GFX7-NEXT: v_mov_b32_e32 v1, v7 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_mul_i96: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v3 -; GFX8-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v9, v0, v3 -; GFX8-NEXT: v_mul_lo_u32 v2, v2, v3 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v5 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 -; GFX8-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX8-NEXT: v_mul_hi_u32 v1, v1, v3 -; GFX8-NEXT: v_mul_lo_u32 v6, v0, v3 -; GFX8-NEXT: v_mul_hi_u32 v0, v0, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v8 -; GFX8-NEXT: v_mov_b32_e32 v0, v6 -; GFX8-NEXT: v_mov_b32_e32 v1, v7 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_mul_i96: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v7, v1, v3 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v9, v0, v3 -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v3 -; GFX9-NEXT: v_mul_lo_u32 v10, v1, v4 -; GFX9-NEXT: v_mul_lo_u32 v5, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v1, v1, v3 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v3 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_mul_hi_u32 v0, v0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v2, v2, v10 -; GFX9-NEXT: v_add_u32_e32 v3, v8, v9 -; GFX9-NEXT: v_add3_u32 v1, v2, v5, v1 -; GFX9-NEXT: v_add3_u32 v2, v1, v0, v3 -; GFX9-NEXT: v_mov_b32_e32 v0, v6 -; GFX9-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_mul_i96: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v6, v0 +; GCN-NEXT: v_mov_b32_e32 v7, v1 +; GCN-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v5, 0 +; GCN-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v7, v4, v[0:1] +; GCN-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 +; GCN-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v3, v[8:9] +; GCN-NEXT: v_mov_b32_e32 v2, v8 +; GCN-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v4, v[1:2] +; GCN-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v3, v[1:2] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_mul_i96: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u32 v6, v1, v3 -; GFX10-NEXT: v_mul_lo_u32 v7, v0, v4 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v3 +; GFX10-NEXT: v_mov_b32_e32 v6, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, v1 ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3 -; GFX10-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX10-NEXT: v_mul_lo_u32 v5, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v4, v0, v4 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3 -; GFX10-NEXT: v_add_co_u32 v6, s4, v6, v7 -; GFX10-NEXT: v_mul_hi_u32 v7, v1, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s4 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v9 -; GFX10-NEXT: v_add_co_u32 v1, s4, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 -; GFX10-NEXT: v_add3_u32 v2, v2, v5, v7 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v10, v6 -; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 +; GFX10-NEXT: v_mul_lo_u32 v5, v6, v5 +; GFX10-NEXT: v_mul_lo_u32 v8, v7, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v6, v3, 0 +; GFX10-NEXT: v_add3_u32 v2, v5, v8, v2 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v6, v4, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v7, v3, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i96 %num, %den ret i96 %result @@ -647,156 +539,130 @@ ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX7-NEXT: s_mul_i32 s9, s1, s4 -; GFX7-NEXT: s_mul_i32 s10, s0, s5 -; GFX7-NEXT: s_add_u32 s9, s9, s10 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s9, v0 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_cselect_b32 s10, 1, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mul_hi_u32 v2, s1, v1 +; GFX7-NEXT: s_mul_i32 s10, s0, s6 +; GFX7-NEXT: v_readfirstlane_b32 s9, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s13, v2 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mul_hi_u32 v2, v2, s4 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s10, v1 -; GFX7-NEXT: s_mul_i32 s9, s2, s4 -; GFX7-NEXT: s_mul_i32 s10, s1, s5 -; GFX7-NEXT: s_mul_i32 s11, s0, s6 -; GFX7-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-NEXT: s_add_u32 s9, s9, s10 -; GFX7-NEXT: v_mul_hi_u32 v4, s0, v3 -; GFX7-NEXT: s_cselect_b32 s10, 1, 0 -; GFX7-NEXT: s_add_u32 s9, s9, s11 -; GFX7-NEXT: s_cselect_b32 s11, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, s9, v2 -; GFX7-NEXT: s_add_i32 s10, s10, s11 -; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v5, vcc, s10, v5 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GFX7-NEXT: v_mov_b32_e32 v4, s2 -; GFX7-NEXT: s_mul_i32 s3, s3, s4 -; GFX7-NEXT: s_mul_i32 s5, s2, s5 -; GFX7-NEXT: v_mul_hi_u32 v4, v4, s4 -; GFX7-NEXT: v_mov_b32_e32 v5, s6 +; GFX7-NEXT: s_mul_i32 s12, s1, s5 +; GFX7-NEXT: v_readfirstlane_b32 s11, v0 +; GFX7-NEXT: s_add_u32 s10, s12, s10 +; GFX7-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: s_addc_u32 s11, s13, s11 +; GFX7-NEXT: s_mul_i32 s12, s2, s4 +; GFX7-NEXT: v_readfirstlane_b32 s13, v2 +; GFX7-NEXT: s_add_u32 s10, s12, s10 +; GFX7-NEXT: v_mul_hi_u32 v0, v0, s4 +; GFX7-NEXT: s_addc_u32 s11, s13, s11 +; GFX7-NEXT: s_mul_i32 s12, s0, s5 +; GFX7-NEXT: v_readfirstlane_b32 s13, v1 +; GFX7-NEXT: s_add_u32 s9, s12, s9 +; GFX7-NEXT: s_addc_u32 s10, s13, s10 +; GFX7-NEXT: s_mul_i32 s13, s1, s4 +; GFX7-NEXT: s_cselect_b32 s12, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s14, v0 +; GFX7-NEXT: s_add_u32 s9, s13, s9 ; GFX7-NEXT: s_mul_i32 s8, s0, s4 -; GFX7-NEXT: s_mul_i32 s9, s1, s6 -; GFX7-NEXT: s_mul_i32 s7, s0, s7 -; GFX7-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX7-NEXT: v_mul_hi_u32 v5, s0, v5 -; GFX7-NEXT: s_add_i32 s0, s3, s5 -; GFX7-NEXT: s_add_i32 s0, s0, s9 -; GFX7-NEXT: s_add_i32 s0, s0, s7 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, s0, v4 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX7-NEXT: v_readfirstlane_b32 s1, v0 -; GFX7-NEXT: v_readfirstlane_b32 s2, v1 -; GFX7-NEXT: v_readfirstlane_b32 s3, v2 +; GFX7-NEXT: s_addc_u32 s10, s14, s10 +; GFX7-NEXT: s_mul_i32 s0, s0, s7 +; GFX7-NEXT: s_addc_u32 s0, s11, s0 +; GFX7-NEXT: s_mul_i32 s1, s1, s6 +; GFX7-NEXT: s_cmp_lg_u32 s12, 0 +; GFX7-NEXT: s_addc_u32 s0, s0, s1 +; GFX7-NEXT: s_mul_i32 s2, s2, s5 +; GFX7-NEXT: s_add_u32 s0, s2, s0 +; GFX7-NEXT: s_mul_i32 s3, s3, s4 +; GFX7-NEXT: s_add_u32 s3, s3, s0 ; GFX7-NEXT: s_mov_b32 s0, s8 +; GFX7-NEXT: s_mov_b32 s1, s9 +; GFX7-NEXT: s_mov_b32 s2, s10 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_mul_i128: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX8-NEXT: s_mul_i32 s9, s1, s4 -; GFX8-NEXT: s_mul_i32 s10, s0, s5 -; GFX8-NEXT: s_add_u32 s9, s9, s10 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s9, v0 -; GFX8-NEXT: v_mov_b32_e32 v2, s1 -; GFX8-NEXT: s_cselect_b32 s10, 1, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_mul_hi_u32 v2, s1, v1 +; GFX8-NEXT: s_mul_i32 s10, s0, s6 +; GFX8-NEXT: v_readfirstlane_b32 s9, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s13, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_mul_hi_u32 v2, v2, s4 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s10, v1 -; GFX8-NEXT: s_mul_i32 s9, s2, s4 -; GFX8-NEXT: s_mul_i32 s10, s1, s5 -; GFX8-NEXT: s_mul_i32 s11, s0, s6 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: s_add_u32 s9, s9, s10 -; GFX8-NEXT: v_mul_hi_u32 v4, s0, v3 -; GFX8-NEXT: s_cselect_b32 s10, 1, 0 -; GFX8-NEXT: s_add_u32 s9, s9, s11 -; GFX8-NEXT: s_cselect_b32 s11, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, s9, v2 -; GFX8-NEXT: s_add_i32 s10, s10, s11 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s10, v5 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2 -; GFX8-NEXT: v_mov_b32_e32 v4, s2 -; GFX8-NEXT: s_mul_i32 s3, s3, s4 -; GFX8-NEXT: s_mul_i32 s5, s2, s5 -; GFX8-NEXT: v_mul_hi_u32 v4, v4, s4 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_mul_i32 s12, s1, s5 +; GFX8-NEXT: v_readfirstlane_b32 s11, v0 +; GFX8-NEXT: s_add_u32 s10, s12, s10 +; GFX8-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: s_addc_u32 s11, s13, s11 +; GFX8-NEXT: s_mul_i32 s12, s2, s4 +; GFX8-NEXT: v_readfirstlane_b32 s13, v2 +; GFX8-NEXT: s_add_u32 s10, s12, s10 +; GFX8-NEXT: v_mul_hi_u32 v0, v0, s4 +; GFX8-NEXT: s_addc_u32 s11, s13, s11 +; GFX8-NEXT: s_mul_i32 s12, s0, s5 +; GFX8-NEXT: v_readfirstlane_b32 s13, v1 +; GFX8-NEXT: s_add_u32 s9, s12, s9 +; GFX8-NEXT: s_addc_u32 s10, s13, s10 +; GFX8-NEXT: s_mul_i32 s13, s1, s4 +; GFX8-NEXT: s_cselect_b32 s12, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s14, v0 +; GFX8-NEXT: s_add_u32 s9, s13, s9 ; GFX8-NEXT: s_mul_i32 s8, s0, s4 -; GFX8-NEXT: s_mul_i32 s9, s1, s6 -; GFX8-NEXT: s_mul_i32 s7, s0, s7 -; GFX8-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v5 -; GFX8-NEXT: s_add_i32 s0, s3, s5 -; GFX8-NEXT: s_add_i32 s0, s0, s9 -; GFX8-NEXT: s_add_i32 s0, s0, s7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, s0, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_readfirstlane_b32 s1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s2, v1 -; GFX8-NEXT: v_readfirstlane_b32 s3, v2 +; GFX8-NEXT: s_addc_u32 s10, s14, s10 +; GFX8-NEXT: s_mul_i32 s0, s0, s7 +; GFX8-NEXT: s_addc_u32 s0, s11, s0 +; GFX8-NEXT: s_mul_i32 s1, s1, s6 +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_addc_u32 s0, s0, s1 +; GFX8-NEXT: s_mul_i32 s2, s2, s5 +; GFX8-NEXT: s_add_u32 s0, s2, s0 +; GFX8-NEXT: s_mul_i32 s3, s3, s4 +; GFX8-NEXT: s_add_u32 s3, s3, s0 ; GFX8-NEXT: s_mov_b32 s0, s8 +; GFX8-NEXT: s_mov_b32 s1, s9 +; GFX8-NEXT: s_mov_b32 s2, s10 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_mul_i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mul_i32 s9, s1, s4 -; GFX9-NEXT: s_mul_i32 s10, s0, s5 -; GFX9-NEXT: s_mul_hi_u32 s11, s0, s4 -; GFX9-NEXT: s_add_u32 s9, s9, s10 -; GFX9-NEXT: s_cselect_b32 s10, 1, 0 -; GFX9-NEXT: s_add_u32 s9, s9, s11 -; GFX9-NEXT: s_cselect_b32 s11, 1, 0 -; GFX9-NEXT: s_add_i32 s10, s10, s11 -; GFX9-NEXT: s_mul_i32 s11, s2, s4 +; GFX9-NEXT: s_mul_i32 s10, s0, s6 ; GFX9-NEXT: s_mul_i32 s12, s1, s5 -; GFX9-NEXT: s_mul_i32 s13, s0, s6 -; GFX9-NEXT: s_add_u32 s11, s11, s12 +; GFX9-NEXT: s_mul_hi_u32 s11, s0, s6 +; GFX9-NEXT: s_mul_hi_u32 s13, s1, s5 +; GFX9-NEXT: s_add_u32 s10, s12, s10 +; GFX9-NEXT: s_addc_u32 s11, s13, s11 +; GFX9-NEXT: s_mul_i32 s12, s2, s4 +; GFX9-NEXT: s_mul_hi_u32 s13, s2, s4 +; GFX9-NEXT: s_add_u32 s10, s12, s10 +; GFX9-NEXT: s_mul_hi_u32 s9, s0, s4 +; GFX9-NEXT: s_addc_u32 s11, s13, s11 +; GFX9-NEXT: s_mul_i32 s12, s0, s5 +; GFX9-NEXT: s_mul_hi_u32 s13, s0, s5 +; GFX9-NEXT: s_add_u32 s9, s12, s9 +; GFX9-NEXT: s_addc_u32 s10, s13, s10 +; GFX9-NEXT: s_mul_i32 s13, s1, s4 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0 -; GFX9-NEXT: s_add_u32 s11, s11, s13 -; GFX9-NEXT: s_cselect_b32 s13, 1, 0 ; GFX9-NEXT: s_mul_hi_u32 s14, s1, s4 -; GFX9-NEXT: s_add_i32 s12, s12, s13 -; GFX9-NEXT: s_add_u32 s11, s11, s14 -; GFX9-NEXT: s_cselect_b32 s13, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s15, s0, s5 -; GFX9-NEXT: s_add_i32 s12, s12, s13 -; GFX9-NEXT: s_add_u32 s11, s11, s15 -; GFX9-NEXT: s_cselect_b32 s13, 1, 0 -; GFX9-NEXT: s_add_i32 s12, s12, s13 -; GFX9-NEXT: s_add_u32 s10, s11, s10 -; GFX9-NEXT: s_cselect_b32 s11, 1, 0 -; GFX9-NEXT: s_add_i32 s12, s12, s11 -; GFX9-NEXT: s_mul_i32 s3, s3, s4 -; GFX9-NEXT: s_mul_i32 s11, s2, s5 -; GFX9-NEXT: s_mul_i32 s13, s1, s6 -; GFX9-NEXT: s_add_i32 s3, s3, s11 -; GFX9-NEXT: s_mul_i32 s7, s0, s7 -; GFX9-NEXT: s_add_i32 s3, s3, s13 -; GFX9-NEXT: s_mul_hi_u32 s2, s2, s4 -; GFX9-NEXT: s_add_i32 s3, s3, s7 -; GFX9-NEXT: s_mul_hi_u32 s1, s1, s5 -; GFX9-NEXT: s_add_i32 s2, s3, s2 +; GFX9-NEXT: s_add_u32 s9, s13, s9 ; GFX9-NEXT: s_mul_i32 s8, s0, s4 -; GFX9-NEXT: s_mul_hi_u32 s0, s0, s6 -; GFX9-NEXT: s_add_i32 s1, s2, s1 -; GFX9-NEXT: s_add_i32 s0, s1, s0 -; GFX9-NEXT: s_add_i32 s3, s0, s12 +; GFX9-NEXT: s_addc_u32 s10, s14, s10 +; GFX9-NEXT: s_mul_i32 s0, s0, s7 +; GFX9-NEXT: s_addc_u32 s0, s11, s0 +; GFX9-NEXT: s_mul_i32 s1, s1, s6 +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-NEXT: s_mul_i32 s2, s2, s5 +; GFX9-NEXT: s_add_u32 s0, s2, s0 +; GFX9-NEXT: s_mul_i32 s3, s3, s4 +; GFX9-NEXT: s_add_u32 s3, s3, s0 ; GFX9-NEXT: s_mov_b32 s0, s8 ; GFX9-NEXT: s_mov_b32 s1, s9 ; GFX9-NEXT: s_mov_b32 s2, s10 @@ -804,50 +670,38 @@ ; ; GFX10-LABEL: s_mul_i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mul_i32 s8, s1, s4 -; GFX10-NEXT: s_mul_i32 s9, s0, s5 -; GFX10-NEXT: s_mul_hi_u32 s10, s0, s4 -; GFX10-NEXT: s_add_u32 s8, s8, s9 -; GFX10-NEXT: s_cselect_b32 s9, 1, 0 -; GFX10-NEXT: s_add_u32 s8, s8, s10 -; GFX10-NEXT: s_cselect_b32 s10, 1, 0 +; GFX10-NEXT: s_mul_i32 s9, s0, s6 ; GFX10-NEXT: s_mul_i32 s11, s1, s5 -; GFX10-NEXT: s_add_i32 s9, s9, s10 -; GFX10-NEXT: s_mul_i32 s10, s2, s4 -; GFX10-NEXT: s_mul_i32 s12, s0, s6 -; GFX10-NEXT: s_add_u32 s10, s10, s11 -; GFX10-NEXT: s_cselect_b32 s11, 1, 0 -; GFX10-NEXT: s_add_u32 s10, s10, s12 -; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: s_mul_hi_u32 s10, s0, s6 +; GFX10-NEXT: s_mul_hi_u32 s12, s1, s5 +; GFX10-NEXT: s_add_u32 s9, s11, s9 +; GFX10-NEXT: s_mul_i32 s11, s2, s4 +; GFX10-NEXT: s_addc_u32 s10, s12, s10 +; GFX10-NEXT: s_mul_hi_u32 s12, s2, s4 +; GFX10-NEXT: s_mul_hi_u32 s8, s0, s4 +; GFX10-NEXT: s_add_u32 s9, s11, s9 +; GFX10-NEXT: s_mul_i32 s11, s0, s5 +; GFX10-NEXT: s_addc_u32 s10, s12, s10 +; GFX10-NEXT: s_mul_hi_u32 s12, s0, s5 +; GFX10-NEXT: s_add_u32 s8, s11, s8 +; GFX10-NEXT: s_addc_u32 s9, s12, s9 +; GFX10-NEXT: s_mul_i32 s12, s1, s4 ; GFX10-NEXT: s_mul_hi_u32 s13, s1, s4 -; GFX10-NEXT: s_add_i32 s11, s11, s12 -; GFX10-NEXT: s_add_u32 s10, s10, s13 -; GFX10-NEXT: s_cselect_b32 s12, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s14, s0, s5 -; GFX10-NEXT: s_add_i32 s11, s11, s12 -; GFX10-NEXT: s_add_u32 s10, s10, s14 -; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: s_cselect_b32 s11, 1, 0 +; GFX10-NEXT: s_add_u32 s8, s12, s8 +; GFX10-NEXT: s_mul_i32 s12, s0, s7 +; GFX10-NEXT: s_addc_u32 s7, s13, s9 +; GFX10-NEXT: s_addc_u32 s9, s10, s12 +; GFX10-NEXT: s_mul_i32 s1, s1, s6 +; GFX10-NEXT: s_cmp_lg_u32 s11, 0 +; GFX10-NEXT: s_mul_i32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s1, s9, s1 ; GFX10-NEXT: s_mul_i32 s3, s3, s4 -; GFX10-NEXT: s_add_i32 s11, s11, s12 -; GFX10-NEXT: s_mul_i32 s12, s2, s5 -; GFX10-NEXT: s_add_u32 s9, s10, s9 -; GFX10-NEXT: s_cselect_b32 s10, 1, 0 -; GFX10-NEXT: s_mul_i32 s13, s1, s6 -; GFX10-NEXT: s_add_i32 s3, s3, s12 -; GFX10-NEXT: s_mul_i32 s7, s0, s7 -; GFX10-NEXT: s_add_i32 s3, s3, s13 -; GFX10-NEXT: s_mul_hi_u32 s2, s2, s4 -; GFX10-NEXT: s_add_i32 s3, s3, s7 -; GFX10-NEXT: s_mul_hi_u32 s1, s1, s5 -; GFX10-NEXT: s_add_i32 s2, s3, s2 -; GFX10-NEXT: s_mul_hi_u32 s3, s0, s6 -; GFX10-NEXT: s_add_i32 s1, s2, s1 -; GFX10-NEXT: s_add_i32 s11, s11, s10 -; GFX10-NEXT: s_add_i32 s1, s1, s3 +; GFX10-NEXT: s_add_i32 s1, s1, s2 ; GFX10-NEXT: s_mul_i32 s0, s0, s4 -; GFX10-NEXT: s_add_i32 s3, s1, s11 +; GFX10-NEXT: s_add_i32 s3, s1, s3 ; GFX10-NEXT: s_mov_b32 s1, s8 -; GFX10-NEXT: s_mov_b32 s2, s9 +; GFX10-NEXT: s_mov_b32 s2, s7 ; GFX10-NEXT: ; return to shader part epilog %result = mul i128 %num, %den %cast = bitcast i128 %result to <4 x i32> @@ -858,190 +712,87 @@ ; GFX7-LABEL: v_mul_i128: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX7-NEXT: v_mul_lo_u32 v10, v0, v5 -; GFX7-NEXT: v_mul_hi_u32 v11, v0, v4 -; GFX7-NEXT: v_mul_lo_u32 v12, v1, v5 -; GFX7-NEXT: v_mul_lo_u32 v13, v0, v6 -; GFX7-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GFX7-NEXT: v_mul_lo_u32 v11, v2, v4 -; GFX7-NEXT: v_mul_hi_u32 v14, v1, v4 -; GFX7-NEXT: v_mul_hi_u32 v15, v0, v5 -; GFX7-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GFX7-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GFX7-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; GFX7-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GFX7-NEXT: v_mul_lo_u32 v12, v2, v5 -; GFX7-NEXT: v_mul_lo_u32 v13, v1, v6 -; GFX7-NEXT: v_mul_lo_u32 v7, v0, v7 -; GFX7-NEXT: v_mul_hi_u32 v2, v2, v4 -; GFX7-NEXT: v_mul_hi_u32 v1, v1, v5 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; GFX7-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX7-NEXT: v_mul_hi_u32 v0, v0, v6 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v13 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v0, v11 -; GFX7-NEXT: v_mov_b32_e32 v0, v8 -; GFX7-NEXT: v_mov_b32_e32 v1, v9 -; GFX7-NEXT: v_mov_b32_e32 v2, v10 +; GFX7-NEXT: v_mov_b32_e32 v8, v0 +; GFX7-NEXT: v_mov_b32_e32 v9, v1 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v6, 0 +; GFX7-NEXT: v_mov_b32_e32 v10, v2 +; GFX7-NEXT: v_mul_lo_u32 v7, v8, v7 +; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v9, v5, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 +; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v4, v[11:12] +; GFX7-NEXT: v_mul_lo_u32 v6, v9, v6 +; GFX7-NEXT: v_mov_b32_e32 v2, v11 +; GFX7-NEXT: v_mad_u64_u32 v[1:2], vcc, v8, v5, v[1:2] +; GFX7-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v4, v[1:2] +; GFX7-NEXT: v_addc_u32_e64 v7, s[4:5], v12, v7, s[4:5] +; GFX7-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc +; GFX7-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v5, v[6:7] +; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, v4, v[5:6] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_mul_i128: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v10, v0, v5 -; GFX8-NEXT: v_mul_hi_u32 v11, v0, v4 -; GFX8-NEXT: v_mul_lo_u32 v12, v1, v5 -; GFX8-NEXT: v_mul_lo_u32 v13, v0, v6 -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GFX8-NEXT: v_mul_lo_u32 v11, v2, v4 -; GFX8-NEXT: v_mul_hi_u32 v14, v1, v4 -; GFX8-NEXT: v_mul_hi_u32 v15, v0, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v11, v12 -; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v11, v13 -; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v13 -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v11, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v13 -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v11, v15 -; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v13 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v11, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v12, v11 -; GFX8-NEXT: v_mul_lo_u32 v12, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v13, v1, v6 -; GFX8-NEXT: v_mul_lo_u32 v7, v0, v7 -; GFX8-NEXT: v_mul_hi_u32 v2, v2, v4 -; GFX8-NEXT: v_mul_hi_u32 v1, v1, v5 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v12 -; GFX8-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v0, v0, v6 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v13 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v11 -; GFX8-NEXT: v_mov_b32_e32 v0, v8 -; GFX8-NEXT: v_mov_b32_e32 v1, v9 -; GFX8-NEXT: v_mov_b32_e32 v2, v10 +; GFX8-NEXT: v_mov_b32_e32 v8, v0 +; GFX8-NEXT: v_mov_b32_e32 v9, v1 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v6, 0 +; GFX8-NEXT: v_mov_b32_e32 v10, v2 +; GFX8-NEXT: v_mul_lo_u32 v7, v8, v7 +; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v9, v5, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 +; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v4, v[11:12] +; GFX8-NEXT: v_mul_lo_u32 v6, v9, v6 +; GFX8-NEXT: v_mov_b32_e32 v2, v11 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], vcc, v8, v5, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v4, v[1:2] +; GFX8-NEXT: v_addc_u32_e64 v7, s[4:5], v12, v7, s[4:5] +; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v5, v[6:7] +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, v4, v[5:6] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_mul_i128: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v9, v1, v4 -; GFX9-NEXT: v_mul_lo_u32 v10, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v11, v0, v4 -; GFX9-NEXT: v_mul_lo_u32 v12, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v13, v0, v6 -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v10, v10, v11 -; GFX9-NEXT: v_mul_lo_u32 v11, v2, v4 -; GFX9-NEXT: v_mul_hi_u32 v14, v1, v4 -; GFX9-NEXT: v_mul_hi_u32 v15, v0, v5 -; GFX9-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v14 -; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v15 -; GFX9-NEXT: v_add3_u32 v12, v12, v13, v14 -; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v11, v12, v13, v11 -; GFX9-NEXT: v_mul_lo_u32 v12, v2, v5 -; GFX9-NEXT: v_mul_lo_u32 v13, v1, v6 -; GFX9-NEXT: v_mul_lo_u32 v7, v0, v7 -; GFX9-NEXT: v_mul_hi_u32 v2, v2, v4 -; GFX9-NEXT: v_mul_hi_u32 v1, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v0, v0, v6 -; GFX9-NEXT: v_add_u32_e32 v3, v3, v12 -; GFX9-NEXT: v_add3_u32 v3, v3, v13, v7 -; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX9-NEXT: v_add3_u32 v3, v1, v0, v11 -; GFX9-NEXT: v_mov_b32_e32 v0, v8 -; GFX9-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-NEXT: v_mov_b32_e32 v2, v10 +; GFX9-NEXT: v_mov_b32_e32 v8, v0 +; GFX9-NEXT: v_mov_b32_e32 v9, v1 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v6, 0 +; GFX9-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-NEXT: v_mul_lo_u32 v7, v8, v7 +; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v9, v5, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 +; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v4, v[11:12] +; GFX9-NEXT: v_mul_lo_u32 v6, v9, v6 +; GFX9-NEXT: v_mov_b32_e32 v2, v11 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], vcc, v8, v5, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v4, v[1:2] +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[4:5], v12, v7, s[4:5] +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v7, v6, vcc +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v5, v[6:7] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, v4, v[5:6] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_mul_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u32 v8, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v9, v1, v5 -; GFX10-NEXT: v_mul_lo_u32 v10, v1, v4 -; GFX10-NEXT: v_mul_lo_u32 v11, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v12, v0, v4 -; GFX10-NEXT: v_mul_lo_u32 v13, v0, v6 -; GFX10-NEXT: v_mul_hi_u32 v15, v0, v5 +; GFX10-NEXT: v_mov_b32_e32 v8, v0 +; GFX10-NEXT: v_mov_b32_e32 v9, v1 +; GFX10-NEXT: v_mov_b32_e32 v10, v2 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX10-NEXT: v_mul_lo_u32 v7, v0, v7 -; GFX10-NEXT: v_add_co_u32 v8, s4, v8, v9 -; GFX10-NEXT: v_add_co_u32 v9, s5, v10, v11 -; GFX10-NEXT: v_mul_hi_u32 v11, v1, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v13, s4, v8, v13 -; GFX10-NEXT: v_add_co_u32 v8, s5, v9, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v11, s4, v13, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s4 -; GFX10-NEXT: v_add_nc_u32_e32 v9, v10, v9 -; GFX10-NEXT: v_mul_lo_u32 v10, v2, v5 -; GFX10-NEXT: v_add_co_u32 v11, s4, v11, v15 -; GFX10-NEXT: v_add3_u32 v12, v14, v12, v13 -; GFX10-NEXT: v_mul_lo_u32 v13, v1, v6 -; GFX10-NEXT: v_mul_hi_u32 v15, v2, v4 -; GFX10-NEXT: v_mul_hi_u32 v1, v1, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v10 -; GFX10-NEXT: v_add_co_u32 v2, s4, v11, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v6, v0, v6 -; GFX10-NEXT: v_add3_u32 v3, v3, v13, v7 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, v4 -; GFX10-NEXT: v_add3_u32 v4, v12, v14, v5 -; GFX10-NEXT: v_add3_u32 v1, v3, v15, v1 -; GFX10-NEXT: v_add3_u32 v3, v1, v6, v4 -; GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v6, 0 +; GFX10-NEXT: v_mul_lo_u32 v7, v8, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v9, v6 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v9, v5, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v10, v4, v[11:12] +; GFX10-NEXT: v_mov_b32_e32 v2, v11 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], vcc_lo, v8, v5, v[1:2] +; GFX10-NEXT: v_mul_lo_u32 v5, v10, v5 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v9, v4, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, v12, v7, s4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v7, v6, vcc_lo +; GFX10-NEXT: v_add3_u32 v3, v4, v5, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i128 %num, %den ret i128 %result @@ -1053,238 +804,220 @@ ; GFX7-NEXT: s_mov_b32 s16, s0 ; GFX7-NEXT: v_mov_b32_e32 v0, s8 ; GFX7-NEXT: v_mul_hi_u32 v0, s16, v0 -; GFX7-NEXT: s_mul_i32 s17, s1, s8 -; GFX7-NEXT: s_mul_i32 s18, s16, s9 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s17, v0 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v2, v2, s8 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s18, v1 -; GFX7-NEXT: s_mul_i32 s17, s2, s8 -; GFX7-NEXT: s_mul_i32 s18, s1, s9 -; GFX7-NEXT: s_mul_i32 s19, s16, s10 -; GFX7-NEXT: v_mov_b32_e32 v3, s9 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_mul_hi_u32 v4, s16, v3 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: s_add_u32 s17, s17, s19 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, s17, v2 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v5, vcc, s18, v5 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX7-NEXT: s_mul_i32 s17, s3, s8 -; GFX7-NEXT: s_mul_i32 s18, s2, s9 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GFX7-NEXT: s_mul_i32 s19, s1, s10 -; GFX7-NEXT: v_mov_b32_e32 v4, s2 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_mul_hi_u32 v5, v4, s8 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: s_add_u32 s17, s17, s19 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: s_mul_i32 s20, s16, s11 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX7-NEXT: s_add_u32 s17, s17, s20 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, s17, v5 -; GFX7-NEXT: v_mov_b32_e32 v6, s10 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v7, s16, v6 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, s18, v8 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GFX7-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX7-NEXT: s_mul_i32 s17, s4, s8 -; GFX7-NEXT: s_mul_i32 s18, s3, s9 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GFX7-NEXT: s_mul_i32 s19, s2, s10 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: s_add_u32 s17, s17, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX7-NEXT: s_mul_i32 s20, s1, s11 -; GFX7-NEXT: v_mov_b32_e32 v5, s3 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v7, v5, s8 -; GFX7-NEXT: s_add_u32 s17, s17, s20 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: s_mul_i32 s21, s16, s12 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v4, v4, s9 -; GFX7-NEXT: s_add_u32 s17, s17, s21 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, s17, v7 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v8, s1, v6 -; GFX7-NEXT: v_add_i32_e32 v11, vcc, s18, v11 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GFX7-NEXT: v_mov_b32_e32 v9, s11 -; GFX7-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v10, s16, v9 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX7-NEXT: s_mul_i32 s17, s5, s8 -; GFX7-NEXT: s_mul_i32 s18, s4, s9 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GFX7-NEXT: s_mul_i32 s19, s3, s10 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: s_add_u32 s17, s17, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GFX7-NEXT: s_mul_i32 s20, s2, s11 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GFX7-NEXT: s_add_u32 s17, s17, s20 -; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GFX7-NEXT: s_mul_i32 s21, s1, s12 -; GFX7-NEXT: v_mov_b32_e32 v7, s4 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v8, v7, s8 -; GFX7-NEXT: s_add_u32 s17, s17, s21 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: s_mul_i32 s22, s16, s13 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v10, v5, s9 -; GFX7-NEXT: s_add_u32 s17, s17, s22 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, s17, v8 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v6, s2, v6 -; GFX7-NEXT: v_add_i32_e32 v14, vcc, s18, v14 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v11, s1, v9 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GFX7-NEXT: v_mov_b32_e32 v12, s12 -; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX7-NEXT: s_mul_i32 s17, s6, s8 -; GFX7-NEXT: s_mul_i32 s18, s5, s9 -; GFX7-NEXT: v_mul_hi_u32 v13, s16, v12 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GFX7-NEXT: s_mul_i32 s19, s4, s10 -; GFX7-NEXT: s_add_u32 s17, s17, s18 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; GFX7-NEXT: s_cselect_b32 s18, 1, 0 -; GFX7-NEXT: s_add_u32 s17, s17, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GFX7-NEXT: s_mul_i32 s20, s3, s11 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; GFX7-NEXT: s_add_u32 s17, s17, s20 -; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GFX7-NEXT: s_mul_i32 s21, s2, s12 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GFX7-NEXT: s_add_u32 s17, s17, s21 -; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GFX7-NEXT: s_mul_i32 s22, s1, s13 -; GFX7-NEXT: v_mov_b32_e32 v8, s5 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v10, v8, s8 -; GFX7-NEXT: s_add_u32 s17, s17, s22 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: s_mul_i32 s23, s16, s14 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_mul_hi_u32 v11, v7, s9 -; GFX7-NEXT: s_add_u32 s17, s17, s23 -; GFX7-NEXT: s_cselect_b32 s19, 1, 0 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, s17, v10 -; GFX7-NEXT: s_add_i32 s18, s18, s19 -; GFX7-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v5, v5, s10 -; GFX7-NEXT: v_add_i32_e32 v17, vcc, s18, v17 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v13, s2, v9 -; GFX7-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v14, s1, v12 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; GFX7-NEXT: v_mov_b32_e32 v15, s13 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_mul_hi_u32 v16, s16, v15 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v16 -; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GFX7-NEXT: s_mul_i32 s7, s7, s8 -; GFX7-NEXT: s_mul_i32 s17, s6, s9 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GFX7-NEXT: s_mul_i32 s5, s5, s10 +; GFX7-NEXT: v_mov_b32_e32 v1, s9 +; GFX7-NEXT: v_mul_hi_u32 v2, s1, v1 +; GFX7-NEXT: v_mul_hi_u32 v1, s16, v1 +; GFX7-NEXT: v_readfirstlane_b32 s17, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s10 +; GFX7-NEXT: v_mul_hi_u32 v0, s16, v0 +; GFX7-NEXT: v_readfirstlane_b32 s21, v2 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_readfirstlane_b32 s23, v1 +; GFX7-NEXT: v_readfirstlane_b32 s19, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: v_mul_hi_u32 v1, v0, s8 +; GFX7-NEXT: v_mul_hi_u32 v3, v2, s8 +; GFX7-NEXT: v_mov_b32_e32 v4, s11 +; GFX7-NEXT: s_mul_i32 s18, s16, s10 +; GFX7-NEXT: v_readfirstlane_b32 s24, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s12 +; GFX7-NEXT: v_readfirstlane_b32 s22, v3 +; GFX7-NEXT: v_mul_hi_u32 v3, s16, v1 +; GFX7-NEXT: s_mul_i32 s20, s1, s9 +; GFX7-NEXT: v_mul_hi_u32 v5, s1, v4 +; GFX7-NEXT: s_add_u32 s18, s20, s18 +; GFX7-NEXT: v_readfirstlane_b32 s25, v3 +; GFX7-NEXT: v_mul_hi_u32 v3, v2, s10 +; GFX7-NEXT: s_addc_u32 s19, s21, s19 +; GFX7-NEXT: s_mul_i32 s21, s2, s8 +; GFX7-NEXT: s_cselect_b32 s20, 1, 0 +; GFX7-NEXT: s_add_u32 s18, s21, s18 +; GFX7-NEXT: v_readfirstlane_b32 s28, v3 +; GFX7-NEXT: v_mov_b32_e32 v3, s3 +; GFX7-NEXT: s_addc_u32 s19, s22, s19 +; GFX7-NEXT: s_mul_i32 s22, s16, s9 +; GFX7-NEXT: v_readfirstlane_b32 s27, v5 +; GFX7-NEXT: v_mul_hi_u32 v5, v3, s9 +; GFX7-NEXT: s_cselect_b32 s21, 1, 0 +; GFX7-NEXT: s_add_u32 s17, s22, s17 +; GFX7-NEXT: s_addc_u32 s18, s23, s18 +; GFX7-NEXT: s_mul_i32 s23, s1, s8 +; GFX7-NEXT: s_cselect_b32 s22, 1, 0 +; GFX7-NEXT: s_add_u32 s17, s23, s17 +; GFX7-NEXT: s_addc_u32 s18, s24, s18 +; GFX7-NEXT: s_mul_i32 s24, s16, s12 +; GFX7-NEXT: s_mul_i32 s26, s1, s11 +; GFX7-NEXT: v_readfirstlane_b32 s29, v5 +; GFX7-NEXT: v_mov_b32_e32 v5, s4 +; GFX7-NEXT: s_cselect_b32 s23, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s26, s24 +; GFX7-NEXT: v_mul_hi_u32 v6, v5, s8 +; GFX7-NEXT: s_addc_u32 s25, s27, s25 +; GFX7-NEXT: s_mul_i32 s27, s2, s10 +; GFX7-NEXT: s_cselect_b32 s26, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s27, s24 +; GFX7-NEXT: v_mul_hi_u32 v0, v0, s10 +; GFX7-NEXT: s_addc_u32 s25, s28, s25 +; GFX7-NEXT: s_mul_i32 s28, s3, s9 +; GFX7-NEXT: s_cselect_b32 s27, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s28, s24 +; GFX7-NEXT: v_readfirstlane_b32 s30, v6 +; GFX7-NEXT: v_mul_hi_u32 v6, s16, v4 +; GFX7-NEXT: s_addc_u32 s25, s29, s25 +; GFX7-NEXT: s_mul_i32 s29, s4, s8 +; GFX7-NEXT: s_cselect_b32 s28, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s29, s24 +; GFX7-NEXT: v_readfirstlane_b32 s33, v0 +; GFX7-NEXT: v_mul_hi_u32 v0, v2, s9 +; GFX7-NEXT: s_addc_u32 s25, s30, s25 +; GFX7-NEXT: s_mul_i32 s30, s16, s11 +; GFX7-NEXT: s_cselect_b32 s29, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s31, v6 +; GFX7-NEXT: s_add_u32 s19, s30, s19 +; GFX7-NEXT: s_addc_u32 s24, s31, s24 +; GFX7-NEXT: s_mul_i32 s31, s1, s10 +; GFX7-NEXT: s_cselect_b32 s30, 1, 0 +; GFX7-NEXT: s_add_u32 s19, s31, s19 +; GFX7-NEXT: v_readfirstlane_b32 s34, v0 +; GFX7-NEXT: v_mul_hi_u32 v0, v3, s8 +; GFX7-NEXT: s_addc_u32 s24, s33, s24 +; GFX7-NEXT: s_mul_i32 s33, s2, s9 +; GFX7-NEXT: s_cselect_b32 s31, 1, 0 +; GFX7-NEXT: s_add_u32 s19, s33, s19 +; GFX7-NEXT: s_addc_u32 s24, s34, s24 +; GFX7-NEXT: s_mul_i32 s34, s3, s8 +; GFX7-NEXT: s_cselect_b32 s33, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s35, v0 +; GFX7-NEXT: s_add_u32 s19, s34, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s14 +; GFX7-NEXT: s_addc_u32 s24, s35, s24 +; GFX7-NEXT: v_mul_hi_u32 v0, s16, v0 +; GFX7-NEXT: s_cselect_b32 s34, 1, 0 +; GFX7-NEXT: s_cmp_lg_u32 s23, 0 +; GFX7-NEXT: s_addc_u32 s19, s22, s19 +; GFX7-NEXT: v_mov_b32_e32 v2, s13 +; GFX7-NEXT: s_cselect_b32 s22, 1, 0 +; GFX7-NEXT: s_cmp_lg_u32 s21, 0 +; GFX7-NEXT: v_mul_hi_u32 v6, s1, v2 +; GFX7-NEXT: s_addc_u32 s20, s20, 0 +; GFX7-NEXT: v_readfirstlane_b32 s23, v0 +; GFX7-NEXT: v_mul_hi_u32 v0, s2, v1 +; GFX7-NEXT: s_cmp_lg_u32 s22, 0 +; GFX7-NEXT: s_addc_u32 s20, s20, s24 +; GFX7-NEXT: s_mul_i32 s22, s16, s14 +; GFX7-NEXT: s_mul_i32 s24, s1, s13 +; GFX7-NEXT: s_cselect_b32 s21, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s35, v6 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: v_readfirstlane_b32 s35, v0 +; GFX7-NEXT: v_mul_hi_u32 v0, v3, s11 +; GFX7-NEXT: s_mul_i32 s24, s2, s12 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: v_readfirstlane_b32 s35, v0 +; GFX7-NEXT: v_mul_hi_u32 v0, v5, s10 +; GFX7-NEXT: s_mul_i32 s24, s3, s11 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: v_readfirstlane_b32 s35, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: v_mul_hi_u32 v6, v0, s9 +; GFX7-NEXT: s_mul_i32 s24, s4, s10 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: v_mul_hi_u32 v1, s1, v1 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: v_readfirstlane_b32 s35, v6 +; GFX7-NEXT: v_mov_b32_e32 v6, s6 +; GFX7-NEXT: v_mul_hi_u32 v6, v6, s8 +; GFX7-NEXT: s_mul_i32 s24, s5, s9 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: v_mul_hi_u32 v2, s16, v2 +; GFX7-NEXT: v_readfirstlane_b32 s36, v1 +; GFX7-NEXT: v_mul_hi_u32 v1, s2, v4 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: s_mul_i32 s24, s6, s8 +; GFX7-NEXT: v_readfirstlane_b32 s35, v6 +; GFX7-NEXT: s_add_u32 s22, s24, s22 +; GFX7-NEXT: s_addc_u32 s23, s35, s23 +; GFX7-NEXT: s_mul_i32 s24, s16, s13 +; GFX7-NEXT: v_readfirstlane_b32 s35, v2 +; GFX7-NEXT: s_add_u32 s24, s24, s25 +; GFX7-NEXT: v_readfirstlane_b32 s37, v1 +; GFX7-NEXT: v_mul_hi_u32 v1, v3, s10 +; GFX7-NEXT: s_addc_u32 s22, s35, s22 +; GFX7-NEXT: s_mul_i32 s35, s1, s12 +; GFX7-NEXT: s_cselect_b32 s25, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s35, s24 +; GFX7-NEXT: s_addc_u32 s22, s36, s22 +; GFX7-NEXT: s_mul_i32 s36, s2, s11 +; GFX7-NEXT: s_cselect_b32 s35, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s36, s24 +; GFX7-NEXT: v_readfirstlane_b32 s38, v1 +; GFX7-NEXT: v_mul_hi_u32 v1, v5, s9 +; GFX7-NEXT: s_addc_u32 s22, s37, s22 +; GFX7-NEXT: s_mul_i32 s37, s3, s10 +; GFX7-NEXT: s_cselect_b32 s36, 1, 0 +; GFX7-NEXT: s_add_u32 s24, s37, s24 +; GFX7-NEXT: v_mul_hi_u32 v0, v0, s8 +; GFX7-NEXT: s_addc_u32 s22, s38, s22 +; GFX7-NEXT: s_mul_i32 s38, s4, s9 +; GFX7-NEXT: s_cselect_b32 s37, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s39, v1 +; GFX7-NEXT: s_add_u32 s24, s38, s24 +; GFX7-NEXT: s_addc_u32 s22, s39, s22 +; GFX7-NEXT: s_mul_i32 s39, s5, s8 +; GFX7-NEXT: s_cselect_b32 s38, 1, 0 +; GFX7-NEXT: v_readfirstlane_b32 s40, v0 +; GFX7-NEXT: s_add_u32 s24, s39, s24 +; GFX7-NEXT: s_addc_u32 s22, s40, s22 +; GFX7-NEXT: s_cselect_b32 s39, 1, 0 +; GFX7-NEXT: s_cmp_lg_u32 s31, 0 +; GFX7-NEXT: s_addc_u32 s30, s30, 0 +; GFX7-NEXT: s_cmp_lg_u32 s33, 0 +; GFX7-NEXT: s_addc_u32 s30, s30, 0 +; GFX7-NEXT: s_cmp_lg_u32 s34, 0 +; GFX7-NEXT: s_addc_u32 s30, s30, 0 +; GFX7-NEXT: s_cmp_lg_u32 s21, 0 +; GFX7-NEXT: s_addc_u32 s21, s30, s24 +; GFX7-NEXT: s_cselect_b32 s24, 1, 0 +; GFX7-NEXT: s_cmp_lg_u32 s27, 0 +; GFX7-NEXT: s_addc_u32 s26, s26, 0 +; GFX7-NEXT: s_cmp_lg_u32 s28, 0 +; GFX7-NEXT: s_addc_u32 s26, s26, 0 +; GFX7-NEXT: s_cmp_lg_u32 s29, 0 +; GFX7-NEXT: s_addc_u32 s26, s26, 0 +; GFX7-NEXT: s_cmp_lg_u32 s24, 0 +; GFX7-NEXT: s_addc_u32 s22, s26, s22 +; GFX7-NEXT: s_mul_i32 s16, s16, s15 +; GFX7-NEXT: s_addc_u32 s15, s23, s16 +; GFX7-NEXT: s_mul_i32 s1, s1, s14 +; GFX7-NEXT: s_cmp_lg_u32 s39, 0 +; GFX7-NEXT: s_addc_u32 s1, s15, s1 +; GFX7-NEXT: s_mul_i32 s2, s2, s13 +; GFX7-NEXT: s_cmp_lg_u32 s38, 0 +; GFX7-NEXT: s_addc_u32 s1, s1, s2 +; GFX7-NEXT: s_mul_i32 s3, s3, s12 +; GFX7-NEXT: s_cmp_lg_u32 s37, 0 +; GFX7-NEXT: s_addc_u32 s1, s1, s3 ; GFX7-NEXT: s_mul_i32 s4, s4, s11 -; GFX7-NEXT: s_mul_i32 s11, s3, s12 -; GFX7-NEXT: s_mul_i32 s12, s2, s13 -; GFX7-NEXT: s_mul_i32 s13, s1, s14 -; GFX7-NEXT: v_mul_hi_u32 v11, s2, v12 -; GFX7-NEXT: v_mul_hi_u32 v12, s1, v15 -; GFX7-NEXT: s_add_i32 s1, s7, s17 -; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX7-NEXT: s_add_i32 s1, s1, s5 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; GFX7-NEXT: v_mov_b32_e32 v10, s6 -; GFX7-NEXT: s_add_i32 s1, s1, s4 -; GFX7-NEXT: v_mul_hi_u32 v10, v10, s8 -; GFX7-NEXT: s_add_i32 s1, s1, s11 -; GFX7-NEXT: v_mul_hi_u32 v8, v8, s9 -; GFX7-NEXT: s_add_i32 s1, s1, s12 -; GFX7-NEXT: s_mul_i32 s15, s16, s15 -; GFX7-NEXT: v_mul_hi_u32 v7, v7, s10 -; GFX7-NEXT: s_add_i32 s1, s1, s13 -; GFX7-NEXT: v_mul_hi_u32 v9, s3, v9 -; GFX7-NEXT: s_add_i32 s1, s1, s15 -; GFX7-NEXT: v_add_i32_e32 v10, vcc, s1, v10 -; GFX7-NEXT: v_mov_b32_e32 v13, s14 -; GFX7-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GFX7-NEXT: v_mul_hi_u32 v13, s16, v13 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GFX7-NEXT: s_cmp_lg_u32 s36, 0 +; GFX7-NEXT: s_addc_u32 s1, s1, s4 +; GFX7-NEXT: s_mul_i32 s5, s5, s10 +; GFX7-NEXT: s_cmp_lg_u32 s35, 0 +; GFX7-NEXT: s_addc_u32 s1, s1, s5 +; GFX7-NEXT: s_mul_i32 s6, s6, s9 +; GFX7-NEXT: s_cmp_lg_u32 s25, 0 +; GFX7-NEXT: s_addc_u32 s1, s1, s6 +; GFX7-NEXT: s_mul_i32 s7, s7, s8 ; GFX7-NEXT: s_mul_i32 s0, s0, s8 -; GFX7-NEXT: v_readfirstlane_b32 s1, v0 -; GFX7-NEXT: v_readfirstlane_b32 s2, v1 -; GFX7-NEXT: v_readfirstlane_b32 s3, v2 -; GFX7-NEXT: v_readfirstlane_b32 s4, v3 -; GFX7-NEXT: v_readfirstlane_b32 s5, v4 -; GFX7-NEXT: v_readfirstlane_b32 s6, v5 -; GFX7-NEXT: v_readfirstlane_b32 s7, v6 +; GFX7-NEXT: s_add_u32 s7, s7, s1 +; GFX7-NEXT: s_mov_b32 s1, s17 +; GFX7-NEXT: s_mov_b32 s2, s18 +; GFX7-NEXT: s_mov_b32 s3, s19 +; GFX7-NEXT: s_mov_b32 s4, s20 +; GFX7-NEXT: s_mov_b32 s5, s21 +; GFX7-NEXT: s_mov_b32 s6, s22 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_mul_i256: @@ -1292,457 +1025,394 @@ ; GFX8-NEXT: s_mov_b32 s16, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, s8 ; GFX8-NEXT: v_mul_hi_u32 v0, s16, v0 -; GFX8-NEXT: s_mul_i32 s17, s1, s8 -; GFX8-NEXT: s_mul_i32 s18, s16, s9 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s17, v0 -; GFX8-NEXT: v_mov_b32_e32 v2, s1 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v2, v2, s8 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s18, v1 -; GFX8-NEXT: s_mul_i32 s17, s2, s8 -; GFX8-NEXT: s_mul_i32 s18, s1, s9 -; GFX8-NEXT: s_mul_i32 s19, s16, s10 -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_mul_hi_u32 v4, s16, v3 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_add_u32 s17, s17, s19 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, s17, v2 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s18, v5 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: s_mul_i32 s17, s3, s8 -; GFX8-NEXT: s_mul_i32 s18, s2, s9 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2 -; GFX8-NEXT: s_mul_i32 s19, s1, s10 -; GFX8-NEXT: v_mov_b32_e32 v4, s2 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_mul_hi_u32 v5, v4, s8 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_add_u32 s17, s17, s19 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: s_mul_i32 s20, s16, s11 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v3, s1, v3 -; GFX8-NEXT: s_add_u32 s17, s17, s20 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, s17, v5 -; GFX8-NEXT: v_mov_b32_e32 v6, s10 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v7, s16, v6 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, s18, v8 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v8, v5 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: s_mul_i32 s17, s4, s8 -; GFX8-NEXT: s_mul_i32 s18, s3, s9 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7 -; GFX8-NEXT: s_mul_i32 s19, s2, s10 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_add_u32 s17, s17, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: s_mul_i32 s20, s1, s11 -; GFX8-NEXT: v_mov_b32_e32 v5, s3 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v7, v5, s8 -; GFX8-NEXT: s_add_u32 s17, s17, s20 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: s_mul_i32 s21, s16, s12 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v4, v4, s9 -; GFX8-NEXT: s_add_u32 s17, s17, s21 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, s17, v7 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v8, s1, v6 -; GFX8-NEXT: v_add_u32_e32 v11, vcc, s18, v11 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: v_mov_b32_e32 v9, s11 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v10, s16, v9 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v11, v7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: s_mul_i32 s17, s5, s8 -; GFX8-NEXT: s_mul_i32 s18, s4, s9 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: s_mul_i32 s19, s3, s10 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v10 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_add_u32 s17, s17, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: s_mul_i32 s20, s2, s11 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: s_add_u32 s17, s17, s20 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: s_mul_i32 s21, s1, s12 -; GFX8-NEXT: v_mov_b32_e32 v7, s4 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v8, v7, s8 -; GFX8-NEXT: s_add_u32 s17, s17, s21 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: s_mul_i32 s22, s16, s13 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v10, v5, s9 -; GFX8-NEXT: s_add_u32 s17, s17, s22 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, s17, v8 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v6, s2, v6 -; GFX8-NEXT: v_add_u32_e32 v14, vcc, s18, v14 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v11, s1, v9 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v14, v10 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mov_b32_e32 v12, s12 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: s_mul_i32 s17, s6, s8 -; GFX8-NEXT: s_mul_i32 s18, s5, s9 -; GFX8-NEXT: v_mul_hi_u32 v13, s16, v12 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v10, v8 -; GFX8-NEXT: s_mul_i32 s19, s4, s10 -; GFX8-NEXT: s_add_u32 s17, s17, s18 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_add_u32 s17, s17, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10 -; GFX8-NEXT: s_mul_i32 s20, s3, s11 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13 -; GFX8-NEXT: s_add_u32 s17, s17, s20 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10 -; GFX8-NEXT: s_mul_i32 s21, s2, s12 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v6, v4 -; GFX8-NEXT: s_add_u32 s17, s17, s21 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: s_mul_i32 s22, s1, s13 -; GFX8-NEXT: v_mov_b32_e32 v8, s5 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v10, v8, s8 -; GFX8-NEXT: s_add_u32 s17, s17, s22 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: s_mul_i32 s23, s16, s14 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_mul_hi_u32 v11, v7, s9 -; GFX8-NEXT: s_add_u32 s17, s17, s23 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, s17, v10 -; GFX8-NEXT: s_add_i32 s18, s18, s19 -; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v5, v5, s10 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, s18, v17 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v13, s2, v9 -; GFX8-NEXT: v_add_u32_e32 v11, vcc, v17, v11 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v10, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v14, s1, v12 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v11, v10 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v13 -; GFX8-NEXT: v_mov_b32_e32 v15, s13 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_mul_hi_u32 v16, s16, v15 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v16 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GFX8-NEXT: s_mul_i32 s7, s7, s8 -; GFX8-NEXT: s_mul_i32 s17, s6, s9 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 -; GFX8-NEXT: s_mul_i32 s5, s5, s10 +; GFX8-NEXT: v_mov_b32_e32 v1, s9 +; GFX8-NEXT: v_mul_hi_u32 v2, s1, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, s16, v1 +; GFX8-NEXT: v_readfirstlane_b32 s17, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 +; GFX8-NEXT: v_mul_hi_u32 v0, s16, v0 +; GFX8-NEXT: v_readfirstlane_b32 s21, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_readfirstlane_b32 s23, v1 +; GFX8-NEXT: v_readfirstlane_b32 s19, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, s8 +; GFX8-NEXT: v_mul_hi_u32 v3, v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v4, s11 +; GFX8-NEXT: s_mul_i32 s18, s16, s10 +; GFX8-NEXT: v_readfirstlane_b32 s24, v1 +; GFX8-NEXT: v_mov_b32_e32 v1, s12 +; GFX8-NEXT: v_readfirstlane_b32 s22, v3 +; GFX8-NEXT: v_mul_hi_u32 v3, s16, v1 +; GFX8-NEXT: s_mul_i32 s20, s1, s9 +; GFX8-NEXT: v_mul_hi_u32 v5, s1, v4 +; GFX8-NEXT: s_add_u32 s18, s20, s18 +; GFX8-NEXT: v_readfirstlane_b32 s25, v3 +; GFX8-NEXT: v_mul_hi_u32 v3, v2, s10 +; GFX8-NEXT: s_addc_u32 s19, s21, s19 +; GFX8-NEXT: s_mul_i32 s21, s2, s8 +; GFX8-NEXT: s_cselect_b32 s20, 1, 0 +; GFX8-NEXT: s_add_u32 s18, s21, s18 +; GFX8-NEXT: v_readfirstlane_b32 s28, v3 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: s_addc_u32 s19, s22, s19 +; GFX8-NEXT: s_mul_i32 s22, s16, s9 +; GFX8-NEXT: v_readfirstlane_b32 s27, v5 +; GFX8-NEXT: v_mul_hi_u32 v5, v3, s9 +; GFX8-NEXT: s_cselect_b32 s21, 1, 0 +; GFX8-NEXT: s_add_u32 s17, s22, s17 +; GFX8-NEXT: s_addc_u32 s18, s23, s18 +; GFX8-NEXT: s_mul_i32 s23, s1, s8 +; GFX8-NEXT: s_cselect_b32 s22, 1, 0 +; GFX8-NEXT: s_add_u32 s17, s23, s17 +; GFX8-NEXT: s_addc_u32 s18, s24, s18 +; GFX8-NEXT: s_mul_i32 s24, s16, s12 +; GFX8-NEXT: s_mul_i32 s26, s1, s11 +; GFX8-NEXT: v_readfirstlane_b32 s29, v5 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: s_cselect_b32 s23, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s26, s24 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, s8 +; GFX8-NEXT: s_addc_u32 s25, s27, s25 +; GFX8-NEXT: s_mul_i32 s27, s2, s10 +; GFX8-NEXT: s_cselect_b32 s26, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s27, s24 +; GFX8-NEXT: v_mul_hi_u32 v0, v0, s10 +; GFX8-NEXT: s_addc_u32 s25, s28, s25 +; GFX8-NEXT: s_mul_i32 s28, s3, s9 +; GFX8-NEXT: s_cselect_b32 s27, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s28, s24 +; GFX8-NEXT: v_readfirstlane_b32 s30, v6 +; GFX8-NEXT: v_mul_hi_u32 v6, s16, v4 +; GFX8-NEXT: s_addc_u32 s25, s29, s25 +; GFX8-NEXT: s_mul_i32 s29, s4, s8 +; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s29, s24 +; GFX8-NEXT: v_readfirstlane_b32 s33, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v2, s9 +; GFX8-NEXT: s_addc_u32 s25, s30, s25 +; GFX8-NEXT: s_mul_i32 s30, s16, s11 +; GFX8-NEXT: s_cselect_b32 s29, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s31, v6 +; GFX8-NEXT: s_add_u32 s19, s30, s19 +; GFX8-NEXT: s_addc_u32 s24, s31, s24 +; GFX8-NEXT: s_mul_i32 s31, s1, s10 +; GFX8-NEXT: s_cselect_b32 s30, 1, 0 +; GFX8-NEXT: s_add_u32 s19, s31, s19 +; GFX8-NEXT: v_readfirstlane_b32 s34, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v3, s8 +; GFX8-NEXT: s_addc_u32 s24, s33, s24 +; GFX8-NEXT: s_mul_i32 s33, s2, s9 +; GFX8-NEXT: s_cselect_b32 s31, 1, 0 +; GFX8-NEXT: s_add_u32 s19, s33, s19 +; GFX8-NEXT: s_addc_u32 s24, s34, s24 +; GFX8-NEXT: s_mul_i32 s34, s3, s8 +; GFX8-NEXT: s_cselect_b32 s33, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s35, v0 +; GFX8-NEXT: s_add_u32 s19, s34, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s14 +; GFX8-NEXT: s_addc_u32 s24, s35, s24 +; GFX8-NEXT: v_mul_hi_u32 v0, s16, v0 +; GFX8-NEXT: s_cselect_b32 s34, 1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s23, 0 +; GFX8-NEXT: s_addc_u32 s19, s22, s19 +; GFX8-NEXT: v_mov_b32_e32 v2, s13 +; GFX8-NEXT: s_cselect_b32 s22, 1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s21, 0 +; GFX8-NEXT: v_mul_hi_u32 v6, s1, v2 +; GFX8-NEXT: s_addc_u32 s20, s20, 0 +; GFX8-NEXT: v_readfirstlane_b32 s23, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, s2, v1 +; GFX8-NEXT: s_cmp_lg_u32 s22, 0 +; GFX8-NEXT: s_addc_u32 s20, s20, s24 +; GFX8-NEXT: s_mul_i32 s22, s16, s14 +; GFX8-NEXT: s_mul_i32 s24, s1, s13 +; GFX8-NEXT: s_cselect_b32 s21, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s35, v6 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: v_readfirstlane_b32 s35, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v3, s11 +; GFX8-NEXT: s_mul_i32 s24, s2, s12 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: v_readfirstlane_b32 s35, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v5, s10 +; GFX8-NEXT: s_mul_i32 s24, s3, s11 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: v_readfirstlane_b32 s35, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-NEXT: v_mul_hi_u32 v6, v0, s9 +; GFX8-NEXT: s_mul_i32 s24, s4, s10 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: v_readfirstlane_b32 s35, v6 +; GFX8-NEXT: v_mov_b32_e32 v6, s6 +; GFX8-NEXT: v_mul_hi_u32 v6, v6, s8 +; GFX8-NEXT: s_mul_i32 s24, s5, s9 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: v_mul_hi_u32 v2, s16, v2 +; GFX8-NEXT: v_readfirstlane_b32 s36, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, s2, v4 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: s_mul_i32 s24, s6, s8 +; GFX8-NEXT: v_readfirstlane_b32 s35, v6 +; GFX8-NEXT: s_add_u32 s22, s24, s22 +; GFX8-NEXT: s_addc_u32 s23, s35, s23 +; GFX8-NEXT: s_mul_i32 s24, s16, s13 +; GFX8-NEXT: v_readfirstlane_b32 s35, v2 +; GFX8-NEXT: s_add_u32 s24, s24, s25 +; GFX8-NEXT: v_readfirstlane_b32 s37, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v3, s10 +; GFX8-NEXT: s_addc_u32 s22, s35, s22 +; GFX8-NEXT: s_mul_i32 s35, s1, s12 +; GFX8-NEXT: s_cselect_b32 s25, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s35, s24 +; GFX8-NEXT: s_addc_u32 s22, s36, s22 +; GFX8-NEXT: s_mul_i32 s36, s2, s11 +; GFX8-NEXT: s_cselect_b32 s35, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s36, s24 +; GFX8-NEXT: v_readfirstlane_b32 s38, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v5, s9 +; GFX8-NEXT: s_addc_u32 s22, s37, s22 +; GFX8-NEXT: s_mul_i32 s37, s3, s10 +; GFX8-NEXT: s_cselect_b32 s36, 1, 0 +; GFX8-NEXT: s_add_u32 s24, s37, s24 +; GFX8-NEXT: v_mul_hi_u32 v0, v0, s8 +; GFX8-NEXT: s_addc_u32 s22, s38, s22 +; GFX8-NEXT: s_mul_i32 s38, s4, s9 +; GFX8-NEXT: s_cselect_b32 s37, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s39, v1 +; GFX8-NEXT: s_add_u32 s24, s38, s24 +; GFX8-NEXT: s_addc_u32 s22, s39, s22 +; GFX8-NEXT: s_mul_i32 s39, s5, s8 +; GFX8-NEXT: s_cselect_b32 s38, 1, 0 +; GFX8-NEXT: v_readfirstlane_b32 s40, v0 +; GFX8-NEXT: s_add_u32 s24, s39, s24 +; GFX8-NEXT: s_addc_u32 s22, s40, s22 +; GFX8-NEXT: s_cselect_b32 s39, 1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s31, 0 +; GFX8-NEXT: s_addc_u32 s30, s30, 0 +; GFX8-NEXT: s_cmp_lg_u32 s33, 0 +; GFX8-NEXT: s_addc_u32 s30, s30, 0 +; GFX8-NEXT: s_cmp_lg_u32 s34, 0 +; GFX8-NEXT: s_addc_u32 s30, s30, 0 +; GFX8-NEXT: s_cmp_lg_u32 s21, 0 +; GFX8-NEXT: s_addc_u32 s21, s30, s24 +; GFX8-NEXT: s_cselect_b32 s24, 1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s27, 0 +; GFX8-NEXT: s_addc_u32 s26, s26, 0 +; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_addc_u32 s26, s26, 0 +; GFX8-NEXT: s_cmp_lg_u32 s29, 0 +; GFX8-NEXT: s_addc_u32 s26, s26, 0 +; GFX8-NEXT: s_cmp_lg_u32 s24, 0 +; GFX8-NEXT: s_addc_u32 s22, s26, s22 +; GFX8-NEXT: s_mul_i32 s16, s16, s15 +; GFX8-NEXT: s_addc_u32 s15, s23, s16 +; GFX8-NEXT: s_mul_i32 s1, s1, s14 +; GFX8-NEXT: s_cmp_lg_u32 s39, 0 +; GFX8-NEXT: s_addc_u32 s1, s15, s1 +; GFX8-NEXT: s_mul_i32 s2, s2, s13 +; GFX8-NEXT: s_cmp_lg_u32 s38, 0 +; GFX8-NEXT: s_addc_u32 s1, s1, s2 +; GFX8-NEXT: s_mul_i32 s3, s3, s12 +; GFX8-NEXT: s_cmp_lg_u32 s37, 0 +; GFX8-NEXT: s_addc_u32 s1, s1, s3 ; GFX8-NEXT: s_mul_i32 s4, s4, s11 -; GFX8-NEXT: s_mul_i32 s11, s3, s12 -; GFX8-NEXT: s_mul_i32 s12, s2, s13 -; GFX8-NEXT: s_mul_i32 s13, s1, s14 -; GFX8-NEXT: v_mul_hi_u32 v11, s2, v12 -; GFX8-NEXT: v_mul_hi_u32 v12, s1, v15 -; GFX8-NEXT: s_add_i32 s1, s7, s17 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: s_add_i32 s1, s1, s5 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v10, v6 -; GFX8-NEXT: v_mov_b32_e32 v10, s6 -; GFX8-NEXT: s_add_i32 s1, s1, s4 -; GFX8-NEXT: v_mul_hi_u32 v10, v10, s8 -; GFX8-NEXT: s_add_i32 s1, s1, s11 -; GFX8-NEXT: v_mul_hi_u32 v8, v8, s9 -; GFX8-NEXT: s_add_i32 s1, s1, s12 -; GFX8-NEXT: s_mul_i32 s15, s16, s15 -; GFX8-NEXT: v_mul_hi_u32 v7, v7, s10 -; GFX8-NEXT: s_add_i32 s1, s1, s13 -; GFX8-NEXT: v_mul_hi_u32 v9, s3, v9 -; GFX8-NEXT: s_add_i32 s1, s1, s15 -; GFX8-NEXT: v_add_u32_e32 v10, vcc, s1, v10 -; GFX8-NEXT: v_mov_b32_e32 v13, s14 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v10, v8 -; GFX8-NEXT: v_mul_hi_u32 v13, s16, v13 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v12 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v13 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 +; GFX8-NEXT: s_cmp_lg_u32 s36, 0 +; GFX8-NEXT: s_addc_u32 s1, s1, s4 +; GFX8-NEXT: s_mul_i32 s5, s5, s10 +; GFX8-NEXT: s_cmp_lg_u32 s35, 0 +; GFX8-NEXT: s_addc_u32 s1, s1, s5 +; GFX8-NEXT: s_mul_i32 s6, s6, s9 +; GFX8-NEXT: s_cmp_lg_u32 s25, 0 +; GFX8-NEXT: s_addc_u32 s1, s1, s6 +; GFX8-NEXT: s_mul_i32 s7, s7, s8 ; GFX8-NEXT: s_mul_i32 s0, s0, s8 -; GFX8-NEXT: v_readfirstlane_b32 s1, v0 -; GFX8-NEXT: v_readfirstlane_b32 s2, v1 -; GFX8-NEXT: v_readfirstlane_b32 s3, v2 -; GFX8-NEXT: v_readfirstlane_b32 s4, v3 -; GFX8-NEXT: v_readfirstlane_b32 s5, v4 -; GFX8-NEXT: v_readfirstlane_b32 s6, v5 -; GFX8-NEXT: v_readfirstlane_b32 s7, v6 +; GFX8-NEXT: s_add_u32 s7, s7, s1 +; GFX8-NEXT: s_mov_b32 s1, s17 +; GFX8-NEXT: s_mov_b32 s2, s18 +; GFX8-NEXT: s_mov_b32 s3, s19 +; GFX8-NEXT: s_mov_b32 s4, s20 +; GFX8-NEXT: s_mov_b32 s5, s21 +; GFX8-NEXT: s_mov_b32 s6, s22 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_mul_i256: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s16, s0 -; GFX9-NEXT: s_mul_i32 s17, s1, s8 -; GFX9-NEXT: s_mul_i32 s18, s16, s9 -; GFX9-NEXT: s_mul_hi_u32 s19, s16, s8 -; GFX9-NEXT: s_add_u32 s17, s17, s18 -; GFX9-NEXT: s_cselect_b32 s18, 1, 0 -; GFX9-NEXT: s_add_u32 s17, s17, s19 -; GFX9-NEXT: s_cselect_b32 s19, 1, 0 -; GFX9-NEXT: s_add_i32 s18, s18, s19 -; GFX9-NEXT: s_mul_i32 s19, s2, s8 +; GFX9-NEXT: s_mul_i32 s18, s0, s10 ; GFX9-NEXT: s_mul_i32 s20, s1, s9 -; GFX9-NEXT: s_mul_i32 s21, s16, s10 -; GFX9-NEXT: s_add_u32 s19, s19, s20 +; GFX9-NEXT: s_mul_hi_u32 s19, s0, s10 +; GFX9-NEXT: s_mul_hi_u32 s21, s1, s9 +; GFX9-NEXT: s_add_u32 s18, s20, s18 +; GFX9-NEXT: s_addc_u32 s19, s21, s19 +; GFX9-NEXT: s_mul_i32 s21, s2, s8 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0 -; GFX9-NEXT: s_add_u32 s19, s19, s21 +; GFX9-NEXT: s_mul_hi_u32 s22, s2, s8 +; GFX9-NEXT: s_add_u32 s18, s21, s18 +; GFX9-NEXT: s_mul_hi_u32 s17, s0, s8 +; GFX9-NEXT: s_addc_u32 s19, s22, s19 +; GFX9-NEXT: s_mul_i32 s22, s0, s9 ; GFX9-NEXT: s_cselect_b32 s21, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s22, s1, s8 -; GFX9-NEXT: s_add_i32 s20, s20, s21 -; GFX9-NEXT: s_add_u32 s19, s19, s22 -; GFX9-NEXT: s_cselect_b32 s21, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s23, s16, s9 -; GFX9-NEXT: s_add_i32 s20, s20, s21 -; GFX9-NEXT: s_add_u32 s19, s19, s23 -; GFX9-NEXT: s_cselect_b32 s21, 1, 0 -; GFX9-NEXT: s_add_i32 s20, s20, s21 -; GFX9-NEXT: s_add_u32 s18, s19, s18 -; GFX9-NEXT: s_cselect_b32 s19, 1, 0 -; GFX9-NEXT: s_add_i32 s20, s20, s19 -; GFX9-NEXT: s_mul_i32 s19, s3, s8 -; GFX9-NEXT: s_mul_i32 s21, s2, s9 -; GFX9-NEXT: s_mul_i32 s22, s1, s10 -; GFX9-NEXT: s_add_u32 s19, s19, s21 -; GFX9-NEXT: s_cselect_b32 s21, 1, 0 -; GFX9-NEXT: s_add_u32 s19, s19, s22 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_mul_i32 s23, s16, s11 -; GFX9-NEXT: s_add_i32 s21, s21, s22 -; GFX9-NEXT: s_add_u32 s19, s19, s23 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s24, s2, s8 -; GFX9-NEXT: s_add_i32 s21, s21, s22 -; GFX9-NEXT: s_add_u32 s19, s19, s24 +; GFX9-NEXT: s_mul_hi_u32 s23, s0, s9 +; GFX9-NEXT: s_add_u32 s17, s22, s17 +; GFX9-NEXT: s_addc_u32 s18, s23, s18 +; GFX9-NEXT: s_mul_i32 s23, s1, s8 ; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s25, s1, s9 -; GFX9-NEXT: s_add_i32 s21, s21, s22 -; GFX9-NEXT: s_add_u32 s19, s19, s25 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s26, s16, s10 -; GFX9-NEXT: s_add_i32 s21, s21, s22 -; GFX9-NEXT: s_add_u32 s19, s19, s26 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_add_i32 s21, s21, s22 -; GFX9-NEXT: s_add_u32 s19, s19, s20 -; GFX9-NEXT: s_cselect_b32 s20, 1, 0 -; GFX9-NEXT: s_add_i32 s21, s21, s20 -; GFX9-NEXT: s_mul_i32 s20, s4, s8 -; GFX9-NEXT: s_mul_i32 s22, s3, s9 -; GFX9-NEXT: s_mul_i32 s23, s2, s10 -; GFX9-NEXT: s_add_u32 s20, s20, s22 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_add_u32 s20, s20, s23 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_i32 s24, s1, s11 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s24 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_i32 s25, s16, s12 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s25 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s26, s3, s8 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s26 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s27, s2, s9 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s27 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s28, s1, s10 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s28 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s29, s16, s11 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s29 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_add_i32 s22, s22, s23 -; GFX9-NEXT: s_add_u32 s20, s20, s21 -; GFX9-NEXT: s_cselect_b32 s21, 1, 0 -; GFX9-NEXT: s_add_i32 s22, s22, s21 -; GFX9-NEXT: s_mul_i32 s21, s5, s8 -; GFX9-NEXT: s_mul_i32 s23, s4, s9 -; GFX9-NEXT: s_mul_i32 s24, s3, s10 -; GFX9-NEXT: s_add_u32 s21, s21, s23 +; GFX9-NEXT: s_mul_hi_u32 s24, s1, s8 +; GFX9-NEXT: s_add_u32 s17, s23, s17 +; GFX9-NEXT: s_addc_u32 s18, s24, s18 +; GFX9-NEXT: s_mul_i32 s24, s0, s12 +; GFX9-NEXT: s_mul_i32 s26, s1, s11 ; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_add_u32 s21, s21, s24 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_i32 s25, s2, s11 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s25 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_i32 s26, s1, s12 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s26 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_i32 s27, s16, s13 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s27 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s28, s4, s8 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s28 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s25, s0, s12 +; GFX9-NEXT: s_mul_hi_u32 s27, s1, s11 +; GFX9-NEXT: s_add_u32 s24, s26, s24 +; GFX9-NEXT: s_addc_u32 s25, s27, s25 +; GFX9-NEXT: s_mul_i32 s27, s2, s10 +; GFX9-NEXT: s_cselect_b32 s26, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s28, s2, s10 +; GFX9-NEXT: s_add_u32 s24, s27, s24 +; GFX9-NEXT: s_addc_u32 s25, s28, s25 +; GFX9-NEXT: s_mul_i32 s28, s3, s9 +; GFX9-NEXT: s_cselect_b32 s27, 1, 0 ; GFX9-NEXT: s_mul_hi_u32 s29, s3, s9 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s29 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s30, s2, s10 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s30 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s31, s1, s11 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s31 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s33, s16, s12 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s33 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_add_i32 s23, s23, s24 -; GFX9-NEXT: s_add_u32 s21, s21, s22 +; GFX9-NEXT: s_add_u32 s24, s28, s24 +; GFX9-NEXT: s_addc_u32 s25, s29, s25 +; GFX9-NEXT: s_mul_i32 s29, s4, s8 +; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s30, s4, s8 +; GFX9-NEXT: s_add_u32 s24, s29, s24 +; GFX9-NEXT: s_addc_u32 s25, s30, s25 +; GFX9-NEXT: s_mul_i32 s30, s0, s11 +; GFX9-NEXT: s_cselect_b32 s29, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s31, s0, s11 +; GFX9-NEXT: s_add_u32 s19, s30, s19 +; GFX9-NEXT: s_addc_u32 s24, s31, s24 +; GFX9-NEXT: s_mul_i32 s31, s1, s10 +; GFX9-NEXT: s_cselect_b32 s30, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s33, s1, s10 +; GFX9-NEXT: s_add_u32 s19, s31, s19 +; GFX9-NEXT: s_addc_u32 s24, s33, s24 +; GFX9-NEXT: s_mul_i32 s33, s2, s9 +; GFX9-NEXT: s_cselect_b32 s31, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s34, s2, s9 +; GFX9-NEXT: s_add_u32 s19, s33, s19 +; GFX9-NEXT: s_addc_u32 s24, s34, s24 +; GFX9-NEXT: s_mul_i32 s34, s3, s8 +; GFX9-NEXT: s_cselect_b32 s33, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s35, s3, s8 +; GFX9-NEXT: s_add_u32 s19, s34, s19 +; GFX9-NEXT: s_addc_u32 s24, s35, s24 +; GFX9-NEXT: s_cselect_b32 s34, 1, 0 +; GFX9-NEXT: s_cmp_lg_u32 s23, 0 +; GFX9-NEXT: s_addc_u32 s19, s22, s19 ; GFX9-NEXT: s_cselect_b32 s22, 1, 0 -; GFX9-NEXT: s_add_i32 s23, s23, s22 -; GFX9-NEXT: s_mul_i32 s22, s6, s8 +; GFX9-NEXT: s_cmp_lg_u32 s21, 0 +; GFX9-NEXT: s_addc_u32 s20, s20, 0 +; GFX9-NEXT: s_cmp_lg_u32 s22, 0 +; GFX9-NEXT: s_addc_u32 s20, s20, s24 +; GFX9-NEXT: s_mul_i32 s22, s0, s14 +; GFX9-NEXT: s_mul_i32 s24, s1, s13 +; GFX9-NEXT: s_cselect_b32 s21, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s23, s0, s14 +; GFX9-NEXT: s_mul_hi_u32 s35, s1, s13 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 +; GFX9-NEXT: s_mul_i32 s24, s2, s12 +; GFX9-NEXT: s_mul_hi_u32 s35, s2, s12 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 +; GFX9-NEXT: s_mul_i32 s24, s3, s11 +; GFX9-NEXT: s_mul_hi_u32 s35, s3, s11 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 +; GFX9-NEXT: s_mul_i32 s24, s4, s10 +; GFX9-NEXT: s_mul_hi_u32 s35, s4, s10 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 ; GFX9-NEXT: s_mul_i32 s24, s5, s9 -; GFX9-NEXT: s_mul_i32 s25, s4, s10 -; GFX9-NEXT: s_add_u32 s22, s22, s24 -; GFX9-NEXT: s_cselect_b32 s24, 1, 0 -; GFX9-NEXT: s_add_u32 s22, s22, s25 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_i32 s26, s3, s11 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s26 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_i32 s27, s2, s12 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s27 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_i32 s28, s1, s13 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s28 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_i32 s29, s16, s14 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s29 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s30, s5, s8 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s30 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s31, s4, s9 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s31 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s33, s3, s10 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s33 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s34, s2, s11 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s34 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s35, s1, s12 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s35 -; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_mul_hi_u32 s36, s16, s13 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s36 +; GFX9-NEXT: s_mul_hi_u32 s35, s5, s9 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 +; GFX9-NEXT: s_mul_i32 s24, s6, s8 +; GFX9-NEXT: s_mul_hi_u32 s35, s6, s8 +; GFX9-NEXT: s_add_u32 s22, s24, s22 +; GFX9-NEXT: s_addc_u32 s23, s35, s23 +; GFX9-NEXT: s_mul_i32 s24, s0, s13 +; GFX9-NEXT: s_mul_hi_u32 s35, s0, s13 +; GFX9-NEXT: s_add_u32 s24, s24, s25 +; GFX9-NEXT: s_addc_u32 s22, s35, s22 +; GFX9-NEXT: s_mul_i32 s35, s1, s12 ; GFX9-NEXT: s_cselect_b32 s25, 1, 0 -; GFX9-NEXT: s_add_i32 s24, s24, s25 -; GFX9-NEXT: s_add_u32 s22, s22, s23 -; GFX9-NEXT: s_cselect_b32 s23, 1, 0 -; GFX9-NEXT: s_add_i32 s24, s24, s23 +; GFX9-NEXT: s_mul_hi_u32 s36, s1, s12 +; GFX9-NEXT: s_add_u32 s24, s35, s24 +; GFX9-NEXT: s_addc_u32 s22, s36, s22 +; GFX9-NEXT: s_mul_i32 s36, s2, s11 +; GFX9-NEXT: s_cselect_b32 s35, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s37, s2, s11 +; GFX9-NEXT: s_add_u32 s24, s36, s24 +; GFX9-NEXT: s_addc_u32 s22, s37, s22 +; GFX9-NEXT: s_mul_i32 s37, s3, s10 +; GFX9-NEXT: s_cselect_b32 s36, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s38, s3, s10 +; GFX9-NEXT: s_add_u32 s24, s37, s24 +; GFX9-NEXT: s_addc_u32 s22, s38, s22 +; GFX9-NEXT: s_mul_i32 s38, s4, s9 +; GFX9-NEXT: s_cselect_b32 s37, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s39, s4, s9 +; GFX9-NEXT: s_add_u32 s24, s38, s24 +; GFX9-NEXT: s_addc_u32 s22, s39, s22 +; GFX9-NEXT: s_mul_i32 s39, s5, s8 +; GFX9-NEXT: s_cselect_b32 s38, 1, 0 +; GFX9-NEXT: s_mul_hi_u32 s40, s5, s8 +; GFX9-NEXT: s_add_u32 s24, s39, s24 +; GFX9-NEXT: s_addc_u32 s22, s40, s22 +; GFX9-NEXT: s_cselect_b32 s39, 1, 0 +; GFX9-NEXT: s_cmp_lg_u32 s31, 0 +; GFX9-NEXT: s_addc_u32 s30, s30, 0 +; GFX9-NEXT: s_cmp_lg_u32 s33, 0 +; GFX9-NEXT: s_addc_u32 s30, s30, 0 +; GFX9-NEXT: s_cmp_lg_u32 s34, 0 +; GFX9-NEXT: s_addc_u32 s30, s30, 0 +; GFX9-NEXT: s_cmp_lg_u32 s21, 0 +; GFX9-NEXT: s_addc_u32 s21, s30, s24 +; GFX9-NEXT: s_cselect_b32 s24, 1, 0 +; GFX9-NEXT: s_cmp_lg_u32 s27, 0 +; GFX9-NEXT: s_addc_u32 s26, s26, 0 +; GFX9-NEXT: s_cmp_lg_u32 s28, 0 +; GFX9-NEXT: s_addc_u32 s26, s26, 0 +; GFX9-NEXT: s_cmp_lg_u32 s29, 0 +; GFX9-NEXT: s_addc_u32 s26, s26, 0 +; GFX9-NEXT: s_cmp_lg_u32 s24, 0 +; GFX9-NEXT: s_mul_i32 s16, s0, s8 +; GFX9-NEXT: s_addc_u32 s22, s26, s22 +; GFX9-NEXT: s_mul_i32 s0, s0, s15 +; GFX9-NEXT: s_addc_u32 s0, s23, s0 +; GFX9-NEXT: s_mul_i32 s1, s1, s14 +; GFX9-NEXT: s_cmp_lg_u32 s39, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s1 +; GFX9-NEXT: s_mul_i32 s2, s2, s13 +; GFX9-NEXT: s_cmp_lg_u32 s38, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s2 +; GFX9-NEXT: s_mul_i32 s3, s3, s12 +; GFX9-NEXT: s_cmp_lg_u32 s37, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s3 +; GFX9-NEXT: s_mul_i32 s4, s4, s11 +; GFX9-NEXT: s_cmp_lg_u32 s36, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s4 +; GFX9-NEXT: s_mul_i32 s5, s5, s10 +; GFX9-NEXT: s_cmp_lg_u32 s35, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s5 +; GFX9-NEXT: s_mul_i32 s6, s6, s9 +; GFX9-NEXT: s_cmp_lg_u32 s25, 0 +; GFX9-NEXT: s_addc_u32 s0, s0, s6 ; GFX9-NEXT: s_mul_i32 s7, s7, s8 -; GFX9-NEXT: s_mul_i32 s23, s6, s9 -; GFX9-NEXT: s_mul_i32 s25, s5, s10 -; GFX9-NEXT: s_add_i32 s7, s7, s23 -; GFX9-NEXT: s_mul_i32 s26, s4, s11 -; GFX9-NEXT: s_add_i32 s7, s7, s25 -; GFX9-NEXT: s_mul_i32 s27, s3, s12 -; GFX9-NEXT: s_add_i32 s7, s7, s26 -; GFX9-NEXT: s_mul_i32 s28, s2, s13 -; GFX9-NEXT: s_add_i32 s7, s7, s27 -; GFX9-NEXT: s_mul_i32 s29, s1, s14 -; GFX9-NEXT: s_add_i32 s7, s7, s28 -; GFX9-NEXT: s_mul_i32 s15, s16, s15 -; GFX9-NEXT: s_add_i32 s7, s7, s29 -; GFX9-NEXT: s_mul_hi_u32 s6, s6, s8 -; GFX9-NEXT: s_add_i32 s7, s7, s15 -; GFX9-NEXT: s_mul_hi_u32 s5, s5, s9 -; GFX9-NEXT: s_add_i32 s6, s7, s6 -; GFX9-NEXT: s_mul_hi_u32 s4, s4, s10 -; GFX9-NEXT: s_add_i32 s5, s6, s5 -; GFX9-NEXT: s_mul_hi_u32 s3, s3, s11 -; GFX9-NEXT: s_add_i32 s4, s5, s4 -; GFX9-NEXT: s_mul_hi_u32 s2, s2, s12 -; GFX9-NEXT: s_add_i32 s3, s4, s3 -; GFX9-NEXT: s_mul_hi_u32 s1, s1, s13 -; GFX9-NEXT: s_add_i32 s2, s3, s2 -; GFX9-NEXT: s_mul_i32 s0, s0, s8 -; GFX9-NEXT: s_mul_hi_u32 s8, s16, s14 -; GFX9-NEXT: s_add_i32 s1, s2, s1 -; GFX9-NEXT: s_add_i32 s1, s1, s8 -; GFX9-NEXT: s_add_i32 s7, s1, s24 +; GFX9-NEXT: s_add_u32 s7, s7, s0 +; GFX9-NEXT: s_mov_b32 s0, s16 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s3, s19 @@ -1753,226 +1423,181 @@ ; ; GFX10-LABEL: s_mul_i256: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mul_i32 s16, s1, s8 -; GFX10-NEXT: s_mul_i32 s17, s0, s9 -; GFX10-NEXT: s_mul_hi_u32 s18, s0, s8 -; GFX10-NEXT: s_add_u32 s16, s16, s17 -; GFX10-NEXT: s_cselect_b32 s17, 1, 0 -; GFX10-NEXT: s_add_u32 s16, s16, s18 -; GFX10-NEXT: s_cselect_b32 s18, 1, 0 +; GFX10-NEXT: s_mul_i32 s17, s0, s10 ; GFX10-NEXT: s_mul_i32 s19, s1, s9 -; GFX10-NEXT: s_add_i32 s17, s17, s18 -; GFX10-NEXT: s_mul_i32 s18, s2, s8 -; GFX10-NEXT: s_mul_i32 s20, s0, s10 -; GFX10-NEXT: s_add_u32 s18, s18, s19 +; GFX10-NEXT: s_mul_hi_u32 s18, s0, s10 +; GFX10-NEXT: s_mul_hi_u32 s20, s1, s9 +; GFX10-NEXT: s_add_u32 s17, s19, s17 +; GFX10-NEXT: s_addc_u32 s18, s20, s18 +; GFX10-NEXT: s_mul_i32 s20, s2, s8 +; GFX10-NEXT: s_mul_hi_u32 s21, s2, s8 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0 -; GFX10-NEXT: s_add_u32 s18, s18, s20 -; GFX10-NEXT: s_cselect_b32 s20, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s21, s1, s8 -; GFX10-NEXT: s_add_i32 s19, s19, s20 -; GFX10-NEXT: s_add_u32 s18, s18, s21 -; GFX10-NEXT: s_cselect_b32 s20, 1, 0 +; GFX10-NEXT: s_add_u32 s17, s20, s17 +; GFX10-NEXT: s_mul_hi_u32 s16, s0, s8 +; GFX10-NEXT: s_addc_u32 s18, s21, s18 +; GFX10-NEXT: s_mul_i32 s21, s0, s9 ; GFX10-NEXT: s_mul_hi_u32 s22, s0, s9 -; GFX10-NEXT: s_add_i32 s19, s19, s20 -; GFX10-NEXT: s_add_u32 s18, s18, s22 -; GFX10-NEXT: s_cselect_b32 s20, 1, 0 -; GFX10-NEXT: s_mul_i32 s21, s1, s10 -; GFX10-NEXT: s_add_i32 s19, s19, s20 -; GFX10-NEXT: s_add_u32 s17, s18, s17 -; GFX10-NEXT: s_cselect_b32 s18, 1, 0 -; GFX10-NEXT: s_mul_i32 s20, s2, s9 -; GFX10-NEXT: s_add_i32 s19, s19, s18 -; GFX10-NEXT: s_mul_i32 s18, s3, s8 -; GFX10-NEXT: s_mul_i32 s22, s0, s11 -; GFX10-NEXT: s_add_u32 s18, s18, s20 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0 -; GFX10-NEXT: s_add_u32 s18, s18, s21 -; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s23, s2, s8 -; GFX10-NEXT: s_add_i32 s20, s20, s21 -; GFX10-NEXT: s_add_u32 s18, s18, s22 -; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s24, s1, s9 -; GFX10-NEXT: s_add_i32 s20, s20, s21 -; GFX10-NEXT: s_add_u32 s18, s18, s23 -; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s25, s0, s10 -; GFX10-NEXT: s_add_i32 s20, s20, s21 -; GFX10-NEXT: s_add_u32 s18, s18, s24 -; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_mul_i32 s22, s2, s10 -; GFX10-NEXT: s_add_i32 s20, s20, s21 -; GFX10-NEXT: s_add_u32 s18, s18, s25 -; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_mul_i32 s23, s1, s11 -; GFX10-NEXT: s_add_i32 s20, s20, s21 -; GFX10-NEXT: s_add_u32 s18, s18, s19 -; GFX10-NEXT: s_cselect_b32 s19, 1, 0 -; GFX10-NEXT: s_mul_i32 s21, s3, s9 -; GFX10-NEXT: s_add_i32 s20, s20, s19 -; GFX10-NEXT: s_mul_i32 s19, s4, s8 -; GFX10-NEXT: s_mul_i32 s24, s0, s12 -; GFX10-NEXT: s_add_u32 s19, s19, s21 +; GFX10-NEXT: s_add_u32 s16, s21, s16 +; GFX10-NEXT: s_addc_u32 s17, s22, s17 +; GFX10-NEXT: s_mul_i32 s22, s1, s8 +; GFX10-NEXT: s_mul_hi_u32 s23, s1, s8 ; GFX10-NEXT: s_cselect_b32 s21, 1, 0 -; GFX10-NEXT: s_add_u32 s19, s19, s22 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s25, s3, s8 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s23 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s26, s2, s9 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s24 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s27, s1, s10 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s25 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s28, s0, s11 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s26 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_i32 s23, s3, s10 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s27 +; GFX10-NEXT: s_add_u32 s16, s22, s16 +; GFX10-NEXT: s_addc_u32 s17, s23, s17 +; GFX10-NEXT: s_mul_i32 s23, s0, s12 +; GFX10-NEXT: s_mul_i32 s25, s1, s11 +; GFX10-NEXT: s_mul_hi_u32 s24, s0, s12 +; GFX10-NEXT: s_mul_hi_u32 s26, s1, s11 ; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_i32 s24, s2, s11 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s28 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_mul_i32 s25, s1, s12 -; GFX10-NEXT: s_add_i32 s21, s21, s22 -; GFX10-NEXT: s_add_u32 s19, s19, s20 -; GFX10-NEXT: s_cselect_b32 s20, 1, 0 -; GFX10-NEXT: s_mul_i32 s22, s4, s9 -; GFX10-NEXT: s_add_i32 s21, s21, s20 -; GFX10-NEXT: s_mul_i32 s20, s5, s8 -; GFX10-NEXT: s_mul_i32 s26, s0, s13 -; GFX10-NEXT: s_add_u32 s20, s20, s22 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_add_u32 s20, s20, s23 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s27, s4, s8 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s24 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s25, s23 +; GFX10-NEXT: s_addc_u32 s24, s26, s24 +; GFX10-NEXT: s_mul_i32 s26, s2, s10 +; GFX10-NEXT: s_mul_hi_u32 s27, s2, s10 +; GFX10-NEXT: s_cselect_b32 s25, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s26, s23 +; GFX10-NEXT: s_addc_u32 s24, s27, s24 +; GFX10-NEXT: s_mul_i32 s27, s3, s9 ; GFX10-NEXT: s_mul_hi_u32 s28, s3, s9 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s25 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s29, s2, s10 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s26 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s30, s1, s11 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s27 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s31, s0, s12 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s28 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_i32 s24, s4, s10 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s29 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_i32 s25, s3, s11 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s30 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_i32 s26, s2, s12 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s31 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_mul_i32 s27, s1, s13 -; GFX10-NEXT: s_add_i32 s22, s22, s23 -; GFX10-NEXT: s_add_u32 s20, s20, s21 +; GFX10-NEXT: s_cselect_b32 s26, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s27, s23 +; GFX10-NEXT: s_addc_u32 s24, s28, s24 +; GFX10-NEXT: s_mul_i32 s28, s4, s8 +; GFX10-NEXT: s_mul_hi_u32 s29, s4, s8 +; GFX10-NEXT: s_cselect_b32 s27, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s28, s23 +; GFX10-NEXT: s_addc_u32 s24, s29, s24 +; GFX10-NEXT: s_mul_i32 s29, s0, s11 +; GFX10-NEXT: s_mul_hi_u32 s30, s0, s11 +; GFX10-NEXT: s_cselect_b32 s28, 1, 0 +; GFX10-NEXT: s_add_u32 s18, s29, s18 +; GFX10-NEXT: s_addc_u32 s23, s30, s23 +; GFX10-NEXT: s_mul_i32 s30, s1, s10 +; GFX10-NEXT: s_mul_hi_u32 s31, s1, s10 +; GFX10-NEXT: s_cselect_b32 s29, 1, 0 +; GFX10-NEXT: s_add_u32 s18, s30, s18 +; GFX10-NEXT: s_addc_u32 s23, s31, s23 +; GFX10-NEXT: s_mul_i32 s31, s2, s9 +; GFX10-NEXT: s_mul_hi_u32 s33, s2, s9 +; GFX10-NEXT: s_cselect_b32 s30, 1, 0 +; GFX10-NEXT: s_add_u32 s18, s31, s18 +; GFX10-NEXT: s_addc_u32 s23, s33, s23 +; GFX10-NEXT: s_mul_i32 s33, s3, s8 +; GFX10-NEXT: s_mul_hi_u32 s34, s3, s8 +; GFX10-NEXT: s_cselect_b32 s31, 1, 0 +; GFX10-NEXT: s_add_u32 s18, s33, s18 +; GFX10-NEXT: s_addc_u32 s23, s34, s23 +; GFX10-NEXT: s_cselect_b32 s33, 1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s22, 0 +; GFX10-NEXT: s_mul_hi_u32 s22, s0, s14 +; GFX10-NEXT: s_addc_u32 s18, s21, s18 ; GFX10-NEXT: s_cselect_b32 s21, 1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s20, 0 +; GFX10-NEXT: s_mul_hi_u32 s34, s1, s13 +; GFX10-NEXT: s_addc_u32 s19, s19, 0 +; GFX10-NEXT: s_cmp_lg_u32 s21, 0 +; GFX10-NEXT: s_mul_i32 s21, s0, s14 +; GFX10-NEXT: s_addc_u32 s19, s19, s23 +; GFX10-NEXT: s_mul_i32 s23, s1, s13 +; GFX10-NEXT: s_cselect_b32 s20, 1, 0 +; GFX10-NEXT: s_add_u32 s21, s23, s21 +; GFX10-NEXT: s_mul_i32 s23, s2, s12 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s2, s12 +; GFX10-NEXT: s_add_u32 s21, s23, s21 +; GFX10-NEXT: s_mul_i32 s23, s3, s11 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s3, s11 +; GFX10-NEXT: s_add_u32 s21, s23, s21 +; GFX10-NEXT: s_mul_i32 s23, s4, s10 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s4, s10 +; GFX10-NEXT: s_add_u32 s21, s23, s21 ; GFX10-NEXT: s_mul_i32 s23, s5, s9 -; GFX10-NEXT: s_add_i32 s22, s22, s21 -; GFX10-NEXT: s_mul_i32 s21, s6, s8 -; GFX10-NEXT: s_mul_i32 s28, s0, s14 -; GFX10-NEXT: s_add_u32 s21, s21, s23 -; GFX10-NEXT: s_cselect_b32 s23, 1, 0 -; GFX10-NEXT: s_add_u32 s21, s21, s24 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s29, s5, s8 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s25 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s30, s4, s9 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s26 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s31, s3, s10 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s27 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s33, s2, s11 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s28 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s34, s1, s12 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s29 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s35, s0, s13 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s30 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s5, s9 +; GFX10-NEXT: s_add_u32 s21, s23, s21 +; GFX10-NEXT: s_mul_i32 s23, s6, s8 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s6, s8 +; GFX10-NEXT: s_add_u32 s21, s23, s21 +; GFX10-NEXT: s_mul_i32 s23, s0, s13 +; GFX10-NEXT: s_addc_u32 s22, s34, s22 +; GFX10-NEXT: s_mul_hi_u32 s34, s0, s13 +; GFX10-NEXT: s_add_u32 s23, s23, s24 +; GFX10-NEXT: s_addc_u32 s21, s34, s21 +; GFX10-NEXT: s_mul_i32 s34, s1, s12 +; GFX10-NEXT: s_mul_hi_u32 s35, s1, s12 ; GFX10-NEXT: s_cselect_b32 s24, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s34, s23 +; GFX10-NEXT: s_addc_u32 s21, s35, s21 +; GFX10-NEXT: s_mul_i32 s35, s2, s11 +; GFX10-NEXT: s_mul_hi_u32 s36, s2, s11 +; GFX10-NEXT: s_cselect_b32 s34, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s35, s23 +; GFX10-NEXT: s_addc_u32 s21, s36, s21 +; GFX10-NEXT: s_mul_i32 s36, s3, s10 +; GFX10-NEXT: s_mul_hi_u32 s37, s3, s10 +; GFX10-NEXT: s_cselect_b32 s35, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s36, s23 +; GFX10-NEXT: s_addc_u32 s21, s37, s21 +; GFX10-NEXT: s_mul_i32 s37, s4, s9 +; GFX10-NEXT: s_mul_hi_u32 s38, s4, s9 +; GFX10-NEXT: s_cselect_b32 s36, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s37, s23 +; GFX10-NEXT: s_addc_u32 s21, s38, s21 +; GFX10-NEXT: s_mul_i32 s38, s5, s8 +; GFX10-NEXT: s_mul_hi_u32 s39, s5, s8 +; GFX10-NEXT: s_cselect_b32 s37, 1, 0 +; GFX10-NEXT: s_add_u32 s23, s38, s23 +; GFX10-NEXT: s_addc_u32 s21, s39, s21 +; GFX10-NEXT: s_cselect_b32 s38, 1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s30, 0 +; GFX10-NEXT: s_mul_i32 s1, s1, s14 +; GFX10-NEXT: s_addc_u32 s29, s29, 0 +; GFX10-NEXT: s_cmp_lg_u32 s31, 0 +; GFX10-NEXT: s_mul_i32 s2, s2, s13 +; GFX10-NEXT: s_addc_u32 s29, s29, 0 +; GFX10-NEXT: s_cmp_lg_u32 s33, 0 +; GFX10-NEXT: s_mul_i32 s3, s3, s12 +; GFX10-NEXT: s_addc_u32 s29, s29, 0 +; GFX10-NEXT: s_cmp_lg_u32 s20, 0 +; GFX10-NEXT: s_mul_i32 s4, s4, s11 +; GFX10-NEXT: s_addc_u32 s20, s29, s23 +; GFX10-NEXT: s_cselect_b32 s23, 1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s26, 0 +; GFX10-NEXT: s_mul_i32 s26, s0, s15 +; GFX10-NEXT: s_addc_u32 s25, s25, 0 +; GFX10-NEXT: s_cmp_lg_u32 s27, 0 +; GFX10-NEXT: s_mul_i32 s5, s5, s10 +; GFX10-NEXT: s_addc_u32 s25, s25, 0 +; GFX10-NEXT: s_cmp_lg_u32 s28, 0 +; GFX10-NEXT: s_mul_i32 s6, s6, s9 +; GFX10-NEXT: s_addc_u32 s25, s25, 0 +; GFX10-NEXT: s_cmp_lg_u32 s23, 0 ; GFX10-NEXT: s_mul_i32 s7, s7, s8 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s31 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_i32 s25, s5, s10 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s33 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_i32 s15, s0, s15 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s34 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_mul_hi_u32 s5, s5, s9 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_add_u32 s21, s21, s35 -; GFX10-NEXT: s_cselect_b32 s24, 1, 0 -; GFX10-NEXT: s_add_i32 s23, s23, s24 -; GFX10-NEXT: s_mul_i32 s24, s6, s9 -; GFX10-NEXT: s_add_u32 s21, s21, s22 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 -; GFX10-NEXT: s_add_i32 s7, s7, s24 -; GFX10-NEXT: s_mul_i32 s24, s4, s11 -; GFX10-NEXT: s_add_i32 s7, s7, s25 -; GFX10-NEXT: s_mul_i32 s25, s3, s12 -; GFX10-NEXT: s_add_i32 s7, s7, s24 -; GFX10-NEXT: s_mul_i32 s24, s2, s13 -; GFX10-NEXT: s_add_i32 s7, s7, s25 -; GFX10-NEXT: s_mul_i32 s25, s1, s14 -; GFX10-NEXT: s_add_i32 s7, s7, s24 -; GFX10-NEXT: s_mul_hi_u32 s6, s6, s8 -; GFX10-NEXT: s_add_i32 s7, s7, s25 -; GFX10-NEXT: s_mul_hi_u32 s4, s4, s10 -; GFX10-NEXT: s_add_i32 s7, s7, s15 -; GFX10-NEXT: s_mul_hi_u32 s3, s3, s11 -; GFX10-NEXT: s_add_i32 s6, s7, s6 -; GFX10-NEXT: s_mul_hi_u32 s2, s2, s12 -; GFX10-NEXT: s_add_i32 s5, s6, s5 -; GFX10-NEXT: s_mul_hi_u32 s1, s1, s13 -; GFX10-NEXT: s_add_i32 s4, s5, s4 -; GFX10-NEXT: s_add_i32 s23, s23, s22 -; GFX10-NEXT: s_add_i32 s3, s4, s3 -; GFX10-NEXT: s_mov_b32 s4, s19 -; GFX10-NEXT: s_add_i32 s2, s3, s2 -; GFX10-NEXT: s_mul_hi_u32 s3, s0, s14 -; GFX10-NEXT: s_add_i32 s1, s2, s1 +; GFX10-NEXT: s_addc_u32 s15, s25, s21 +; GFX10-NEXT: s_addc_u32 s21, s22, s26 +; GFX10-NEXT: s_cmp_lg_u32 s38, 0 ; GFX10-NEXT: s_mul_i32 s0, s0, s8 -; GFX10-NEXT: s_add_i32 s1, s1, s3 +; GFX10-NEXT: s_addc_u32 s1, s21, s1 +; GFX10-NEXT: s_cmp_lg_u32 s37, 0 +; GFX10-NEXT: s_addc_u32 s1, s1, s2 +; GFX10-NEXT: s_cmp_lg_u32 s36, 0 ; GFX10-NEXT: s_mov_b32 s2, s17 -; GFX10-NEXT: s_add_i32 s7, s1, s23 -; GFX10-NEXT: s_mov_b32 s1, s16 +; GFX10-NEXT: s_addc_u32 s1, s1, s3 +; GFX10-NEXT: s_cmp_lg_u32 s35, 0 ; GFX10-NEXT: s_mov_b32 s3, s18 +; GFX10-NEXT: s_addc_u32 s1, s1, s4 +; GFX10-NEXT: s_cmp_lg_u32 s34, 0 +; GFX10-NEXT: s_mov_b32 s4, s19 +; GFX10-NEXT: s_addc_u32 s1, s1, s5 +; GFX10-NEXT: s_cmp_lg_u32 s24, 0 ; GFX10-NEXT: s_mov_b32 s5, s20 -; GFX10-NEXT: s_mov_b32 s6, s21 +; GFX10-NEXT: s_addc_u32 s1, s1, s6 +; GFX10-NEXT: s_mov_b32 s6, s15 +; GFX10-NEXT: s_add_i32 s7, s1, s7 +; GFX10-NEXT: s_mov_b32 s1, s16 ; GFX10-NEXT: ; return to shader part epilog %result = mul i256 %num, %den %cast = bitcast i256 %result to <8 x i32> @@ -1983,850 +1608,283 @@ ; GFX7-LABEL: v_mul_i256: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_lo_u32 v16, v1, v8 -; GFX7-NEXT: v_mul_lo_u32 v17, v0, v9 -; GFX7-NEXT: v_mul_hi_u32 v18, v0, v8 -; GFX7-NEXT: v_mul_lo_u32 v19, v2, v8 -; GFX7-NEXT: v_mul_lo_u32 v20, v1, v9 -; GFX7-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; GFX7-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GFX7-NEXT: v_mul_lo_u32 v18, v0, v10 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GFX7-NEXT: v_mul_hi_u32 v21, v1, v8 -; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GFX7-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v20, v19 -; GFX7-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GFX7-NEXT: v_mul_hi_u32 v21, v0, v9 -; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GFX7-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GFX7-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GFX7-NEXT: v_mul_lo_u32 v20, v3, v8 -; GFX7-NEXT: v_mul_lo_u32 v21, v2, v9 -; GFX7-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GFX7-NEXT: v_mul_lo_u32 v19, v1, v10 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; GFX7-NEXT: v_mul_lo_u32 v22, v0, v11 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v20, v19 -; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v21, v20 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v22 -; GFX7-NEXT: v_mul_hi_u32 v22, v2, v8 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v22 -; GFX7-NEXT: v_mul_hi_u32 v22, v1, v9 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v22 -; GFX7-NEXT: v_mul_hi_u32 v22, v0, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v19, v22 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; GFX7-NEXT: v_add_i32_e32 v18, vcc, v19, v18 -; GFX7-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX7-NEXT: v_mul_lo_u32 v21, v4, v8 -; GFX7-NEXT: v_mul_lo_u32 v22, v3, v9 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v20, v19 -; GFX7-NEXT: v_mul_lo_u32 v20, v2, v10 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_mul_lo_u32 v23, v1, v11 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v21, v20 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v22, v21 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_mul_lo_u32 v23, v0, v12 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v3, v8 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v2, v9 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v1, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v0, v11 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v20, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; GFX7-NEXT: v_add_i32_e32 v19, vcc, v20, v19 -; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX7-NEXT: v_mul_lo_u32 v22, v5, v8 -; GFX7-NEXT: v_mul_lo_u32 v23, v4, v9 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v21, v20 -; GFX7-NEXT: v_mul_lo_u32 v21, v3, v10 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v22, v21 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v23, v22 -; GFX7-NEXT: v_mul_lo_u32 v23, v2, v11 -; GFX7-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX7-NEXT: v_mul_lo_u32 v15, v0, v15 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_lo_u32 v23, v1, v12 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_lo_u32 v23, v0, v13 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v4, v8 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v3, v9 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v2, v10 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v1, v11 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_hi_u32 v23, v0, v12 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v21, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_add_i32_e32 v20, vcc, v21, v20 -; GFX7-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v22, v21 -; GFX7-NEXT: v_mul_lo_u32 v22, v6, v8 -; GFX7-NEXT: v_mul_lo_u32 v23, v5, v9 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_mul_lo_u32 v23, v4, v10 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v23 -; GFX7-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v24, v23 -; GFX7-NEXT: v_mul_lo_u32 v24, v3, v11 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_lo_u32 v24, v2, v12 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_lo_u32 v24, v1, v13 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_lo_u32 v24, v0, v14 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v5, v8 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v4, v9 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v3, v10 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v2, v11 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v1, v12 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_mul_hi_u32 v24, v0, v13 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, v22, v24 -; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v24 -; GFX7-NEXT: v_add_i32_e32 v21, vcc, v22, v21 -; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX7-NEXT: v_add_i32_e32 v23, vcc, v23, v22 -; GFX7-NEXT: v_mul_lo_u32 v22, v0, v8 -; GFX7-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX7-NEXT: v_mul_lo_u32 v6, v6, v9 -; GFX7-NEXT: v_mul_hi_u32 v9, v5, v9 -; GFX7-NEXT: v_mul_lo_u32 v5, v5, v10 -; GFX7-NEXT: v_mul_hi_u32 v10, v4, v10 -; GFX7-NEXT: v_mul_lo_u32 v4, v4, v11 -; GFX7-NEXT: v_mul_hi_u32 v11, v3, v11 -; GFX7-NEXT: v_mul_lo_u32 v3, v3, v12 -; GFX7-NEXT: v_mul_hi_u32 v12, v2, v12 -; GFX7-NEXT: v_mul_lo_u32 v2, v2, v13 -; GFX7-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GFX7-NEXT: v_mul_hi_u32 v13, v1, v13 -; GFX7-NEXT: v_mul_lo_u32 v1, v1, v14 -; GFX7-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GFX7-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX7-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v15 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v8 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; GFX7-NEXT: v_mul_hi_u32 v0, v0, v14 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v13 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX7-NEXT: v_add_i32_e32 v7, vcc, v0, v23 -; GFX7-NEXT: v_mov_b32_e32 v0, v22 -; GFX7-NEXT: v_mov_b32_e32 v1, v16 -; GFX7-NEXT: v_mov_b32_e32 v2, v17 -; GFX7-NEXT: v_mov_b32_e32 v3, v18 -; GFX7-NEXT: v_mov_b32_e32 v4, v19 -; GFX7-NEXT: v_mov_b32_e32 v5, v20 -; GFX7-NEXT: v_mov_b32_e32 v6, v21 +; GFX7-NEXT: v_mov_b32_e32 v16, v0 +; GFX7-NEXT: v_mov_b32_e32 v17, v1 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, 0 +; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v16, v12, 0 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v13, v[0:1] +; GFX7-NEXT: v_mul_lo_u32 v27, v3, v12 +; GFX7-NEXT: v_mul_lo_u32 v26, v5, v10 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v12, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v10, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v11, v[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v5, v9, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v10, v[18:19] +; GFX7-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v3, v9, v[18:19] +; GFX7-NEXT: v_addc_u32_e32 v22, vcc, 0, v20, vcc +; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v4, v8, v[18:19] +; GFX7-NEXT: v_addc_u32_e32 v24, vcc, 0, v22, vcc +; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v16, v10, 0 +; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v6, v8, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v17, v9, v[22:23] +; GFX7-NEXT: v_mov_b32_e32 v1, v18 +; GFX7-NEXT: v_mov_b32_e32 v18, v19 +; GFX7-NEXT: v_mov_b32_e32 v19, v20 +; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v16, v13, v[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v2, v8, v[22:23] +; GFX7-NEXT: v_addc_u32_e64 v25, s[4:5], 0, v0, s[4:5] +; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v12, v[18:19] +; GFX7-NEXT: v_mov_b32_e32 v0, v23 +; GFX7-NEXT: v_mul_lo_u32 v23, v4, v11 +; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v2, v11, v[18:19] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v16, v11, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[18:19] +; GFX7-NEXT: v_mul_lo_u32 v13, v2, v13 +; GFX7-NEXT: v_mul_lo_u32 v20, v6, v9 +; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[10:11], v4, v9, v[11:12] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v17, v10, v[0:1] +; GFX7-NEXT: v_addc_u32_e64 v4, s[12:13], 0, v4, s[12:13] +; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[12:13], v2, v9, v[0:1] +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[14:15], v16, v8, 0 +; GFX7-NEXT: v_mov_b32_e32 v2, v22 +; GFX7-NEXT: v_addc_u32_e64 v10, s[12:13], 0, v4, s[12:13] +; GFX7-NEXT: v_mad_u64_u32 v[1:2], s[16:17], v16, v9, v[1:2] +; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[12:13], v3, v8, v[18:19] +; GFX7-NEXT: v_mad_u64_u32 v[5:6], s[14:15], v5, v8, v[11:12] +; GFX7-NEXT: v_addc_u32_e64 v10, s[12:13], 0, v10, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[16:17] +; GFX7-NEXT: v_mad_u64_u32 v[1:2], s[12:13], v17, v8, v[1:2] +; GFX7-NEXT: v_addc_u32_e64 v3, s[12:13], v11, v3, s[12:13] +; GFX7-NEXT: v_mul_lo_u32 v11, v16, v15 +; GFX7-NEXT: v_mul_lo_u32 v9, v17, v14 +; GFX7-NEXT: v_addc_u32_e64 v4, s[12:13], v25, v4, s[12:13] +; GFX7-NEXT: v_addc_u32_e64 v5, s[12:13], v10, v5, s[12:13] +; GFX7-NEXT: v_addc_u32_e64 v6, s[12:13], v24, v6, s[12:13] +; GFX7-NEXT: v_addc_u32_e64 v10, s[12:13], v21, v11, s[12:13] +; GFX7-NEXT: v_addc_u32_e64 v9, s[12:13], v10, v9, s[14:15] +; GFX7-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v13, s[10:11] +; GFX7-NEXT: v_addc_u32_e64 v9, s[8:9], v9, v27, s[8:9] +; GFX7-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v23, s[6:7] +; GFX7-NEXT: v_addc_u32_e64 v9, s[4:5], v9, v26, s[4:5] +; GFX7-NEXT: v_addc_u32_e32 v9, vcc, v9, v20, vcc +; GFX7-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v7, v8, v[9:10] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_mul_i256: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mul_lo_u32 v16, v1, v8 -; GFX8-NEXT: v_mul_lo_u32 v17, v0, v9 -; GFX8-NEXT: v_mul_hi_u32 v18, v0, v8 -; GFX8-NEXT: v_mul_lo_u32 v19, v2, v8 -; GFX8-NEXT: v_mul_lo_u32 v20, v1, v9 -; GFX8-NEXT: v_add_u32_e32 v16, vcc, v16, v17 -; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v16, vcc, v16, v18 -; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v18 -; GFX8-NEXT: v_mul_lo_u32 v18, v0, v10 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v20 -; GFX8-NEXT: v_mul_hi_u32 v21, v1, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v18, vcc, v19, v18 -; GFX8-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v20, v19 -; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v21 -; GFX8-NEXT: v_mul_hi_u32 v21, v0, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v20 -; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v21 -; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v20 -; GFX8-NEXT: v_add_u32_e32 v17, vcc, v18, v17 -; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v20, v3, v8 -; GFX8-NEXT: v_mul_lo_u32 v21, v2, v9 -; GFX8-NEXT: v_add_u32_e32 v18, vcc, v19, v18 -; GFX8-NEXT: v_mul_lo_u32 v19, v1, v10 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v21 -; GFX8-NEXT: v_mul_lo_u32 v22, v0, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v20, v19 -; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v21, v20 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v22 -; GFX8-NEXT: v_mul_hi_u32 v22, v2, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v21 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v22 -; GFX8-NEXT: v_mul_hi_u32 v22, v1, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v21 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v22 -; GFX8-NEXT: v_mul_hi_u32 v22, v0, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v21 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v22 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v21 -; GFX8-NEXT: v_add_u32_e32 v18, vcc, v19, v18 -; GFX8-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v21, v4, v8 -; GFX8-NEXT: v_mul_lo_u32 v22, v3, v9 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v20, v19 -; GFX8-NEXT: v_mul_lo_u32 v20, v2, v10 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_mul_lo_u32 v23, v1, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v21, v20 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v22, v21 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_mul_lo_u32 v23, v0, v12 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v3, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v2, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v1, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v0, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v20, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v22 -; GFX8-NEXT: v_add_u32_e32 v19, vcc, v20, v19 -; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v22, v5, v8 -; GFX8-NEXT: v_mul_lo_u32 v23, v4, v9 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v21, v20 -; GFX8-NEXT: v_mul_lo_u32 v21, v3, v10 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v22, v21 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v23, v22 -; GFX8-NEXT: v_mul_lo_u32 v23, v2, v11 -; GFX8-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX8-NEXT: v_mul_lo_u32 v15, v0, v15 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_lo_u32 v23, v1, v12 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_lo_u32 v23, v0, v13 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v4, v8 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v3, v9 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v2, v10 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v1, v11 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_hi_u32 v23, v0, v12 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v21, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_add_u32_e32 v20, vcc, v21, v20 -; GFX8-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v22, v21 -; GFX8-NEXT: v_mul_lo_u32 v22, v6, v8 -; GFX8-NEXT: v_mul_lo_u32 v23, v5, v9 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_mul_lo_u32 v23, v4, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v23 -; GFX8-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v24, v23 -; GFX8-NEXT: v_mul_lo_u32 v24, v3, v11 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_lo_u32 v24, v2, v12 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_lo_u32 v24, v1, v13 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_lo_u32 v24, v0, v14 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v5, v8 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v4, v9 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v3, v10 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v2, v11 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v1, v12 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_mul_hi_u32 v24, v0, v13 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, v22, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v24 -; GFX8-NEXT: v_add_u32_e32 v21, vcc, v22, v21 -; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v23, vcc, v23, v22 -; GFX8-NEXT: v_mul_lo_u32 v22, v0, v8 -; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, v9 -; GFX8-NEXT: v_mul_hi_u32 v9, v5, v9 -; GFX8-NEXT: v_mul_lo_u32 v5, v5, v10 -; GFX8-NEXT: v_mul_hi_u32 v10, v4, v10 -; GFX8-NEXT: v_mul_lo_u32 v4, v4, v11 -; GFX8-NEXT: v_mul_hi_u32 v11, v3, v11 -; GFX8-NEXT: v_mul_lo_u32 v3, v3, v12 -; GFX8-NEXT: v_mul_hi_u32 v12, v2, v12 -; GFX8-NEXT: v_mul_lo_u32 v2, v2, v13 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_mul_hi_u32 v13, v1, v13 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, v14 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v15 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v8 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v9 -; GFX8-NEXT: v_mul_hi_u32 v0, v0, v14 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v10 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v11 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v12 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v13 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v0, v23 -; GFX8-NEXT: v_mov_b32_e32 v0, v22 -; GFX8-NEXT: v_mov_b32_e32 v1, v16 -; GFX8-NEXT: v_mov_b32_e32 v2, v17 -; GFX8-NEXT: v_mov_b32_e32 v3, v18 -; GFX8-NEXT: v_mov_b32_e32 v4, v19 -; GFX8-NEXT: v_mov_b32_e32 v5, v20 -; GFX8-NEXT: v_mov_b32_e32 v6, v21 +; GFX8-NEXT: v_mov_b32_e32 v16, v0 +; GFX8-NEXT: v_mov_b32_e32 v17, v1 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, 0 +; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v16, v12, 0 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v13, v[0:1] +; GFX8-NEXT: v_mul_lo_u32 v27, v3, v12 +; GFX8-NEXT: v_mul_lo_u32 v26, v5, v10 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v12, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v10, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v11, v[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v5, v9, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v10, v[18:19] +; GFX8-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc +; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v3, v9, v[18:19] +; GFX8-NEXT: v_addc_u32_e32 v22, vcc, 0, v20, vcc +; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v4, v8, v[18:19] +; GFX8-NEXT: v_addc_u32_e32 v24, vcc, 0, v22, vcc +; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v16, v10, 0 +; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v6, v8, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v17, v9, v[22:23] +; GFX8-NEXT: v_mov_b32_e32 v1, v18 +; GFX8-NEXT: v_mov_b32_e32 v18, v19 +; GFX8-NEXT: v_mov_b32_e32 v19, v20 +; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v16, v13, v[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v2, v8, v[22:23] +; GFX8-NEXT: v_addc_u32_e64 v25, s[4:5], 0, v0, s[4:5] +; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v12, v[18:19] +; GFX8-NEXT: v_mov_b32_e32 v0, v23 +; GFX8-NEXT: v_mul_lo_u32 v23, v4, v11 +; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v2, v11, v[18:19] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v16, v11, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[18:19] +; GFX8-NEXT: v_mul_lo_u32 v13, v2, v13 +; GFX8-NEXT: v_mul_lo_u32 v20, v6, v9 +; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[10:11], v4, v9, v[11:12] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v17, v10, v[0:1] +; GFX8-NEXT: v_addc_u32_e64 v4, s[12:13], 0, v4, s[12:13] +; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[12:13], v2, v9, v[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[14:15], v16, v8, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, v22 +; GFX8-NEXT: v_addc_u32_e64 v10, s[12:13], 0, v4, s[12:13] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[16:17], v16, v9, v[1:2] +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[12:13], v3, v8, v[18:19] +; GFX8-NEXT: v_mad_u64_u32 v[5:6], s[14:15], v5, v8, v[11:12] +; GFX8-NEXT: v_addc_u32_e64 v10, s[12:13], 0, v10, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[16:17] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[12:13], v17, v8, v[1:2] +; GFX8-NEXT: v_addc_u32_e64 v3, s[12:13], v11, v3, s[12:13] +; GFX8-NEXT: v_mul_lo_u32 v11, v16, v15 +; GFX8-NEXT: v_mul_lo_u32 v9, v17, v14 +; GFX8-NEXT: v_addc_u32_e64 v4, s[12:13], v25, v4, s[12:13] +; GFX8-NEXT: v_addc_u32_e64 v5, s[12:13], v10, v5, s[12:13] +; GFX8-NEXT: v_addc_u32_e64 v6, s[12:13], v24, v6, s[12:13] +; GFX8-NEXT: v_addc_u32_e64 v10, s[12:13], v21, v11, s[12:13] +; GFX8-NEXT: v_addc_u32_e64 v9, s[12:13], v10, v9, s[14:15] +; GFX8-NEXT: v_addc_u32_e64 v9, s[10:11], v9, v13, s[10:11] +; GFX8-NEXT: v_addc_u32_e64 v9, s[8:9], v9, v27, s[8:9] +; GFX8-NEXT: v_addc_u32_e64 v9, s[6:7], v9, v23, s[6:7] +; GFX8-NEXT: v_addc_u32_e64 v9, s[4:5], v9, v26, s[4:5] +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v9, v20, vcc +; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v7, v8, v[9:10] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_mul_i256: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v16, v2, v8 -; GFX9-NEXT: v_mul_lo_u32 v17, v1, v9 -; GFX9-NEXT: v_mul_lo_u32 v18, v0, v10 -; GFX9-NEXT: v_mul_hi_u32 v19, v1, v8 -; GFX9-NEXT: v_mul_lo_u32 v20, v1, v8 -; GFX9-NEXT: v_add_co_u32_e32 v16, vcc, v16, v17 -; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v16, vcc, v16, v18 -; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v16, v19 -; GFX9-NEXT: v_mul_lo_u32 v21, v0, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v18, v17, v18, v16 -; GFX9-NEXT: v_mul_hi_u32 v16, v0, v8 -; GFX9-NEXT: v_add_co_u32_e32 v17, vcc, v20, v21 -; GFX9-NEXT: v_mul_hi_u32 v21, v0, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v16, vcc, v17, v16 -; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GFX9-NEXT: v_add_u32_e32 v17, v20, v17 -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v19, v21 -; GFX9-NEXT: v_mul_lo_u32 v21, v3, v8 -; GFX9-NEXT: v_mul_lo_u32 v22, v2, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v17, vcc, v19, v17 -; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v18, v18, v20, v19 -; GFX9-NEXT: v_mul_lo_u32 v19, v1, v10 -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v21, v22 -; GFX9-NEXT: v_mul_lo_u32 v22, v0, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v20, v19 -; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v19, v22 -; GFX9-NEXT: v_mul_hi_u32 v23, v2, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v20, v21, v20, v22 -; GFX9-NEXT: v_mul_hi_u32 v21, v1, v9 -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v19, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v0, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v19, v21 -; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v20, v20, v22, v21 -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v19, v23 -; GFX9-NEXT: v_mul_lo_u32 v22, v4, v8 -; GFX9-NEXT: v_mul_lo_u32 v23, v3, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v18, vcc, v19, v18 -; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v19, v20, v21, v19 -; GFX9-NEXT: v_mul_lo_u32 v20, v2, v10 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v22, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v1, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v21, v20 -; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v21, v22, v21, v23 -; GFX9-NEXT: v_mul_lo_u32 v22, v0, v12 -; GFX9-NEXT: v_mul_hi_u32 v23, v3, v8 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v21, v21, v22, v23 -; GFX9-NEXT: v_mul_hi_u32 v22, v2, v9 -; GFX9-NEXT: v_mul_hi_u32 v23, v1, v10 -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v21, v21, v22, v23 -; GFX9-NEXT: v_mul_hi_u32 v22, v0, v11 -; GFX9-NEXT: v_mul_lo_u32 v23, v3, v10 -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v20, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v19, vcc, v20, v19 -; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v20, v21, v22, v20 -; GFX9-NEXT: v_mul_lo_u32 v21, v5, v8 -; GFX9-NEXT: v_mul_lo_u32 v22, v4, v9 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v2, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v22, v22, v24, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v1, v12 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v0, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v22, v22, v24, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v4, v8 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v3, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v22, v22, v24, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v2, v10 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v1, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v22, v22, v24, v23 -; GFX9-NEXT: v_mul_hi_u32 v23, v0, v12 -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v21, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v20, vcc, v21, v20 -; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v21, v22, v23, v21 -; GFX9-NEXT: v_mul_lo_u32 v22, v6, v8 -; GFX9-NEXT: v_mul_lo_u32 v23, v5, v9 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v4, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v3, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v23 -; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v23, v24, v25, v23 -; GFX9-NEXT: v_mul_lo_u32 v24, v2, v12 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_mul_lo_u32 v24, v1, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v23, v23, v25, v24 -; GFX9-NEXT: v_mul_lo_u32 v24, v0, v14 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v5, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v23, v23, v25, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v4, v9 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v3, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v23, v23, v25, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v2, v11 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v1, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v23, v23, v25, v24 -; GFX9-NEXT: v_mul_hi_u32 v24, v0, v13 -; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, v22, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v21, vcc, v22, v21 -; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v22, v23, v24, v22 -; GFX9-NEXT: v_mul_lo_u32 v23, v6, v9 -; GFX9-NEXT: v_mul_lo_u32 v24, v4, v11 -; GFX9-NEXT: v_mul_hi_u32 v4, v4, v10 -; GFX9-NEXT: v_mul_hi_u32 v6, v6, v8 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v23 -; GFX9-NEXT: v_mul_lo_u32 v23, v5, v10 -; GFX9-NEXT: v_mul_hi_u32 v5, v5, v9 -; GFX9-NEXT: v_mul_hi_u32 v9, v3, v11 -; GFX9-NEXT: v_mul_lo_u32 v3, v3, v12 -; GFX9-NEXT: v_mul_hi_u32 v10, v2, v12 -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v13 -; GFX9-NEXT: v_mul_hi_u32 v11, v1, v13 -; GFX9-NEXT: v_mul_lo_u32 v12, v1, v14 -; GFX9-NEXT: v_mul_lo_u32 v13, v0, v15 -; GFX9-NEXT: v_add3_u32 v7, v7, v23, v24 -; GFX9-NEXT: v_add3_u32 v2, v7, v3, v2 -; GFX9-NEXT: v_mul_lo_u32 v1, v0, v8 -; GFX9-NEXT: v_mul_hi_u32 v0, v0, v14 -; GFX9-NEXT: v_add3_u32 v2, v2, v12, v13 -; GFX9-NEXT: v_add3_u32 v2, v2, v6, v5 -; GFX9-NEXT: v_add3_u32 v2, v2, v4, v9 -; GFX9-NEXT: v_add3_u32 v2, v2, v10, v11 -; GFX9-NEXT: v_add3_u32 v7, v2, v0, v22 -; GFX9-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v1, v16 -; GFX9-NEXT: v_mov_b32_e32 v2, v17 -; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: v_mov_b32_e32 v4, v19 -; GFX9-NEXT: v_mov_b32_e32 v5, v20 -; GFX9-NEXT: v_mov_b32_e32 v6, v21 +; GFX9-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, 0 +; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v16, v12, 0 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v13, v[0:1] +; GFX9-NEXT: v_mul_lo_u32 v27, v3, v12 +; GFX9-NEXT: v_mul_lo_u32 v26, v5, v10 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v12, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v10, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v11, v[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v5, v9, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v10, v[18:19] +; GFX9-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v20, vcc +; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v3, v9, v[18:19] +; GFX9-NEXT: v_addc_co_u32_e32 v22, vcc, 0, v20, vcc +; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v4, v8, v[18:19] +; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, 0, v22, vcc +; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v16, v10, 0 +; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v6, v8, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v17, v9, v[22:23] +; GFX9-NEXT: v_mov_b32_e32 v1, v18 +; GFX9-NEXT: v_mov_b32_e32 v18, v19 +; GFX9-NEXT: v_mov_b32_e32 v19, v20 +; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v16, v13, v[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v2, v8, v[22:23] +; GFX9-NEXT: v_addc_co_u32_e64 v25, s[4:5], 0, v0, s[4:5] +; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v17, v12, v[18:19] +; GFX9-NEXT: v_mov_b32_e32 v0, v23 +; GFX9-NEXT: v_mul_lo_u32 v23, v4, v11 +; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v2, v11, v[18:19] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v16, v11, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[18:19] +; GFX9-NEXT: v_mul_lo_u32 v13, v2, v13 +; GFX9-NEXT: v_mul_lo_u32 v20, v6, v9 +; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[10:11], v4, v9, v[11:12] +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v17, v10, v[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[12:13], 0, v4, s[12:13] +; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[12:13], v2, v9, v[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[14:15], v16, v8, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, v22 +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[12:13], 0, v4, s[12:13] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[16:17], v16, v9, v[1:2] +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[12:13], v3, v8, v[18:19] +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[14:15], v5, v8, v[11:12] +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[12:13], 0, v10, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[16:17] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[12:13], v17, v8, v[1:2] +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[12:13], v11, v3, s[12:13] +; GFX9-NEXT: v_mul_lo_u32 v11, v16, v15 +; GFX9-NEXT: v_mul_lo_u32 v9, v17, v14 +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[12:13], v25, v4, s[12:13] +; GFX9-NEXT: v_addc_co_u32_e64 v5, s[12:13], v10, v5, s[12:13] +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[12:13], v24, v6, s[12:13] +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[12:13], v21, v11, s[12:13] +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[12:13], v10, v9, s[14:15] +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[10:11], v9, v13, s[10:11] +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[8:9], v9, v27, s[8:9] +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[6:7], v9, v23, s[6:7] +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], v9, v26, s[4:5] +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v20, vcc +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v7, v8, v[9:10] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_mul_i256: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u32 v16, v1, v8 -; GFX10-NEXT: v_mul_lo_u32 v17, v0, v9 -; GFX10-NEXT: v_mul_hi_u32 v18, v0, v8 -; GFX10-NEXT: v_mul_lo_u32 v19, v2, v8 -; GFX10-NEXT: v_mul_lo_u32 v20, v1, v9 -; GFX10-NEXT: v_mul_hi_u32 v21, v1, v8 -; GFX10-NEXT: v_mul_lo_u32 v22, v3, v8 -; GFX10-NEXT: v_mul_lo_u32 v25, v1, v10 -; GFX10-NEXT: v_mul_hi_u32 v23, v0, v9 -; GFX10-NEXT: v_add_co_u32 v16, s4, v16, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v27, v0, v10 -; GFX10-NEXT: v_mul_hi_u32 v29, v3, v9 -; GFX10-NEXT: v_add_co_u32 v16, s4, v16, v18 -; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v19, s4, v19, v20 -; GFX10-NEXT: v_mul_lo_u32 v20, v2, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_nc_u32_e32 v17, v17, v18 -; GFX10-NEXT: v_mul_lo_u32 v18, v0, v10 -; GFX10-NEXT: v_mul_hi_u32 v31, v4, v9 +; GFX10-NEXT: v_mov_b32_e32 v16, v0 +; GFX10-NEXT: v_mov_b32_e32 v17, v1 +; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9 +; GFX10-NEXT: v_mul_lo_u32 v28, v5, v10 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX10-NEXT: v_mul_lo_u32 v15, v0, v15 -; GFX10-NEXT: v_add_co_u32 v18, s4, v19, v18 -; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v20, s4, v22, v20 -; GFX10-NEXT: v_add_co_u32 v18, s5, v18, v21 -; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s5 -; GFX10-NEXT: v_mul_lo_u32 v22, v0, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v25 -; GFX10-NEXT: v_add3_u32 v19, v24, v19, v21 -; GFX10-NEXT: v_mul_hi_u32 v21, v2, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v18, s5, v18, v23 -; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v22 -; GFX10-NEXT: v_mul_hi_u32 v23, v1, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v21 -; GFX10-NEXT: v_add_co_u32 v17, s5, v18, v17 -; GFX10-NEXT: v_add3_u32 v21, v26, v24, v25 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v23 -; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, 1, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v23, 0, 1, s4 -; GFX10-NEXT: v_mul_lo_u32 v25, v4, v8 -; GFX10-NEXT: v_mul_lo_u32 v26, v3, v9 -; GFX10-NEXT: v_add_co_u32 v20, s5, v20, v27 -; GFX10-NEXT: v_add3_u32 v18, v19, v22, v18 -; GFX10-NEXT: v_add3_u32 v19, v21, v24, v23 -; GFX10-NEXT: v_mul_lo_u32 v21, v2, v10 -; GFX10-NEXT: v_mul_lo_u32 v24, v1, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v23, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v22, s4, v25, v26 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v26, v3, v8 -; GFX10-NEXT: v_add_co_u32 v21, s4, v22, v21 -; GFX10-NEXT: v_mul_lo_u32 v22, v0, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v24 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v18, s4, v20, v18 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v16, v14, 0 +; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v16, v12, 0 +; GFX10-NEXT: v_mul_lo_u32 v30, v17, v14 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v13, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v2, v12, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v17, v11, v[18:19] ; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 -; GFX10-NEXT: v_mul_hi_u32 v22, v2, v9 -; GFX10-NEXT: v_add3_u32 v24, v25, v27, v24 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v26 -; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 -; GFX10-NEXT: v_add3_u32 v19, v19, v23, v20 -; GFX10-NEXT: v_mul_hi_u32 v20, v1, v10 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 -; GFX10-NEXT: v_add3_u32 v23, v24, v25, v26 -; GFX10-NEXT: v_mul_lo_u32 v22, v5, v8 -; GFX10-NEXT: v_mul_lo_u32 v24, v4, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_mul_lo_u32 v26, v3, v10 -; GFX10-NEXT: v_add_co_u32 v20, s4, v21, v20 -; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v27, v0, v11 -; GFX10-NEXT: v_add_co_u32 v22, s4, v22, v24 -; GFX10-NEXT: v_add3_u32 v21, v23, v25, v21 -; GFX10-NEXT: v_mul_lo_u32 v23, v2, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v22, s4, v22, v26 -; GFX10-NEXT: v_mul_lo_u32 v26, v1, v12 -; GFX10-NEXT: v_add_co_u32 v20, s5, v20, v27 -; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v22, s4, v22, v23 -; GFX10-NEXT: v_mul_lo_u32 v23, v0, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v22, s4, v22, v26 -; GFX10-NEXT: v_mul_hi_u32 v26, v4, v8 -; GFX10-NEXT: v_add_co_u32 v19, s5, v20, v19 -; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v22, s4, v22, v23 -; GFX10-NEXT: v_add3_u32 v23, v24, v27, v28 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add3_u32 v20, v21, v25, v20 -; GFX10-NEXT: v_add_co_u32 v21, s4, v22, v26 -; GFX10-NEXT: v_mul_hi_u32 v22, v2, v10 -; GFX10-NEXT: v_add3_u32 v23, v23, v30, v24 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v29 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v26, v1, v11 -; GFX10-NEXT: v_mul_lo_u32 v27, v6, v8 -; GFX10-NEXT: v_mul_lo_u32 v28, v5, v9 -; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 -; GFX10-NEXT: v_add3_u32 v23, v23, v24, v25 -; GFX10-NEXT: v_mul_lo_u32 v24, v4, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v21, s5, v21, v26 -; GFX10-NEXT: v_add_co_u32 v25, s4, v27, v28 -; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s5 -; GFX10-NEXT: v_mul_lo_u32 v27, v3, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v24, s4, v25, v24 -; GFX10-NEXT: v_mul_hi_u32 v29, v0, v12 -; GFX10-NEXT: v_add3_u32 v22, v23, v22, v26 -; GFX10-NEXT: v_mul_lo_u32 v23, v2, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v24, s4, v24, v27 -; GFX10-NEXT: v_mul_lo_u32 v27, v1, v13 -; GFX10-NEXT: v_add_co_u32 v21, s5, v21, v29 -; GFX10-NEXT: v_cndmask_b32_e64 v29, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v23, s4, v24, v23 -; GFX10-NEXT: v_mul_lo_u32 v24, v0, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, 1, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s5 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v27 -; GFX10-NEXT: v_mul_hi_u32 v27, v5, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v20, s5, v21, v20 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v24 -; GFX10-NEXT: v_add3_u32 v24, v28, v25, v29 -; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v25, v3, v10 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v27 -; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s5 -; GFX10-NEXT: v_add3_u32 v24, v24, v30, v32 -; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v31 -; GFX10-NEXT: v_add3_u32 v21, v22, v26, v21 -; GFX10-NEXT: v_mul_hi_u32 v26, v2, v11 -; GFX10-NEXT: v_add3_u32 v22, v24, v28, v27 -; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v25 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v3, v11, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19] +; GFX10-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[20:21], s4, v16, v10, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v10, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19] +; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, 0, v22, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v5, v9, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19] +; GFX10-NEXT: v_add_co_ci_u32_e32 v26, vcc_lo, 0, v24, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[22:23], s4, v6, v8, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v9, v[20:21] ; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 -; GFX10-NEXT: v_mul_lo_u32 v28, v6, v9 -; GFX10-NEXT: v_mul_lo_u32 v29, v3, v12 -; GFX10-NEXT: v_mul_hi_u32 v27, v1, v12 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v26 -; GFX10-NEXT: v_add3_u32 v22, v22, v24, v25 -; GFX10-NEXT: v_mul_lo_u32 v24, v5, v10 -; GFX10-NEXT: v_mul_lo_u32 v25, v4, v11 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v28 -; GFX10-NEXT: v_mul_lo_u32 v28, v2, v13 -; GFX10-NEXT: v_mul_hi_u32 v6, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v5, v5, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 -; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v27 -; GFX10-NEXT: v_add3_u32 v7, v7, v24, v25 -; GFX10-NEXT: v_mul_lo_u32 v24, v1, v14 -; GFX10-NEXT: v_mul_hi_u32 v25, v0, v13 -; GFX10-NEXT: v_mul_hi_u32 v4, v4, v10 -; GFX10-NEXT: v_mul_hi_u32 v3, v3, v11 -; GFX10-NEXT: v_add3_u32 v7, v7, v29, v28 -; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 -; GFX10-NEXT: v_mul_hi_u32 v2, v2, v12 -; GFX10-NEXT: v_mul_hi_u32 v1, v1, v13 -; GFX10-NEXT: v_add3_u32 v7, v7, v24, v15 -; GFX10-NEXT: v_add_co_u32 v9, s4, v23, v25 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s4 -; GFX10-NEXT: v_add3_u32 v22, v22, v26, v27 -; GFX10-NEXT: v_add3_u32 v5, v7, v6, v5 -; GFX10-NEXT: v_add_co_u32 v6, s4, v9, v21 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 -; GFX10-NEXT: v_add3_u32 v3, v5, v4, v3 -; GFX10-NEXT: v_mul_hi_u32 v4, v0, v14 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, v8 -; GFX10-NEXT: v_add3_u32 v5, v22, v10, v7 -; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v2, v17 -; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: v_add3_u32 v7, v1, v4, v5 -; GFX10-NEXT: v_mov_b32_e32 v1, v16 -; GFX10-NEXT: v_mov_b32_e32 v4, v19 -; GFX10-NEXT: v_mov_b32_e32 v5, v20 +; GFX10-NEXT: v_mov_b32_e32 v20, v22 +; GFX10-NEXT: v_mad_u64_u32 v[21:22], vcc_lo, v2, v8, v[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e32 v29, vcc_lo, 0, v25, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v16, v13, v[19:20] +; GFX10-NEXT: v_mov_b32_e32 v20, v18 +; GFX10-NEXT: v_mov_b32_e32 v19, v22 +; GFX10-NEXT: v_mul_lo_u32 v22, v16, v15 +; GFX10-NEXT: v_mad_u64_u32 v[24:25], vcc_lo, v17, v12, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[14:15], s6, v16, v11, v[19:20] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v16, v8, 0 +; GFX10-NEXT: v_mul_lo_u32 v20, v4, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 +; GFX10-NEXT: v_mad_u64_u32 v[18:19], s5, v2, v11, v[24:25] +; GFX10-NEXT: v_mul_lo_u32 v25, v3, v12 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], s6, v17, v10, v[14:15] +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s6, 0, v6, s6 +; GFX10-NEXT: v_mul_lo_u32 v24, v2, v13 +; GFX10-NEXT: v_mad_u64_u32 v[18:19], s7, v3, v10, v[18:19] +; GFX10-NEXT: v_mov_b32_e32 v13, v1 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s6, v2, v9, v[11:12] +; GFX10-NEXT: v_mov_b32_e32 v14, v21 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s6, 0, v6, s6 +; GFX10-NEXT: v_mad_u64_u32 v[10:11], s6, v4, v9, v[18:19] +; GFX10-NEXT: v_mad_u64_u32 v[12:13], s8, v16, v9, v[13:14] +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s8 +; GFX10-NEXT: v_mad_u64_u32 v[3:4], s8, v3, v8, v[1:2] +; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s8, 0, v6, s8 +; GFX10-NEXT: v_mad_u64_u32 v[5:6], s8, v5, v8, v[10:11] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, v17, v8, v[12:13] +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s9, v9, v3, s9 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s9, v29, v4, s9 +; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s9, v14, v5, s9 +; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s9, v26, v6, s9 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s9, v23, v22, s9 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s8, v9, v30, s8 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s6, v9, v24, s6 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s6, v9, v25, s7 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s5, v9, v20, s5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v28, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v8, vcc_lo, v9, v27, s4 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v8, v7 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i256 %num, %den ret i256 %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -9,57 +9,49 @@ body: | bb.0: - liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_sss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY3]], [[COPY4]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec - ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) ; GFX9MI-LABEL: name: mad_u64_u32_sss ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] - ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) ; GFX10-LABEL: name: mad_u64_u32_sss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %2:_(s64) = COPY $sgpr2_sgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -68,56 +60,48 @@ body: | bb.0: - liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; ; ; GFX8-LABEL: name: mad_u64_u32_ssv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY3]], [[COPY4]] + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY5]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY6]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) ; GFX9MI-LABEL: name: mad_u64_u32_ssv ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY4]], [[COPY2]] ; GFX10-LABEL: name: mad_u64_u32_ssv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY3]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY4]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %2:_(s64) = COPY $vgpr0_vgpr1 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -126,42 +110,34 @@ body: | bb.0: - liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 + liveins: $sgpr0, $vgpr1, $sgpr2_sgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_svs ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY4]] ; GFX9MI-LABEL: name: mad_u64_u32_svs ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY4]] ; GFX10-LABEL: name: mad_u64_u32_svs ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY4]] %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr2_sgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -170,39 +146,31 @@ body: | bb.0: - liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 + liveins: $sgpr0, $vgpr1, $vgpr2_vgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_svv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY2]] ; GFX9MI-LABEL: name: mad_u64_u32_svv ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY2]] ; GFX10-LABEL: name: mad_u64_u32_svv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY3]](s32), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $vgpr2_vgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -211,42 +179,34 @@ body: | bb.0: - liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 + liveins: $vgpr0, $sgpr1, $sgpr2_sgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_vss ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY4]] ; GFX9MI-LABEL: name: mad_u64_u32_vss ; GFX9MI: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY4]] ; GFX10-LABEL: name: mad_u64_u32_vss ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY4]] %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %1:_(s32) = COPY $sgpr1 + %2:_(s64) = COPY $sgpr2_sgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -255,39 +215,31 @@ body: | bb.0: - liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $sgpr1, $vgpr2_vgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_vsv ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY2]] ; GFX9MI-LABEL: name: mad_u64_u32_vsv ; GFX9MI: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY2]] ; GFX10-LABEL: name: mad_u64_u32_vsv ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY3]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr1 - %3:_(s32) = COPY $vgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %2:_(s64) = COPY $vgpr2_vgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -296,39 +248,31 @@ body: | bb.0: - liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 + liveins: $vgpr0, $vgpr1, $sgpr2_sgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_vvs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY3]] ; GFX9MI-LABEL: name: mad_u64_u32_vvs ; GFX9MI: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY3]] ; GFX10-LABEL: name: mad_u64_u32_vvs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY2]](s64) + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY3]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $sgpr1 - %3:_(s32) = COPY $sgpr2 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %2:_(s64) = COPY $sgpr2_sgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -337,36 +281,28 @@ body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; ; ; GFX8-LABEL: name: mad_u64_u32_vvv ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] ; GFX9MI-LABEL: name: mad_u64_u32_vvv ; GFX9MI: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]] + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] ; GFX10-LABEL: name: mad_u64_u32_vvv ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]] + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4 + %2:_(s64) = COPY $vgpr2_vgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2 ... --- @@ -375,72 +311,64 @@ body: | bb.0: - liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3 ; ; ; GFX8-LABEL: name: mad_i64_i32_sss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY3]], [[COPY4]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] - ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) ; GFX9MI-LABEL: name: mad_i64_i32_sss ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) ; GFX10-LABEL: name: mad_i64_i32_sss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $sgpr2 - %3:_(s32) = COPY $sgpr3 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 + %2:_(s64) = COPY $sgpr2_sgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 ... --- @@ -449,67 +377,59 @@ body: | bb.0: - liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + liveins: $sgpr0, $sgpr1, $vgpr2_vgpr3 ; ; ; GFX8-LABEL: name: mad_i64_i32_ssv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY3]], [[COPY4]] ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY5]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY6]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) ; GFX9MI-LABEL: name: mad_i64_i32_ssv ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY3]](s32), [[COPY4]], [[COPY2]] ; GFX10-LABEL: name: mad_i64_i32_ssv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY3]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY4]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s64) = G_MERGE_VALUES %2, %3 - %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4 + %2:_(s64) = COPY $vgpr2_vgpr3 + %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -21,139 +21,130 @@ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v2, v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3 -; CHECK-NEXT: v_trunc_f32_e32 v6, v6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v6 -; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v2, vcc -; CHECK-NEXT: v_mul_lo_u32 v10, v9, v3 -; CHECK-NEXT: v_mul_lo_u32 v11, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v13, v8, v3 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v3 -; CHECK-NEXT: v_xor_b32_e32 v4, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CHECK-NEXT: v_mul_lo_u32 v11, v6, v12 -; CHECK-NEXT: v_mul_lo_u32 v13, v3, v10 -; CHECK-NEXT: v_mul_hi_u32 v14, v3, v12 -; CHECK-NEXT: v_mul_hi_u32 v12, v6, v12 -; CHECK-NEXT: v_xor_b32_e32 v5, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_xor_b32_e32 v6, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v7, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v7 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v7, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v3, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v8, v2 +; CHECK-NEXT: v_mul_lo_u32 v14, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v13 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v14, v6, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CHECK-NEXT: v_mul_hi_u32 v13, v3, v10 -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v12 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v13, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CHECK-NEXT: v_mul_hi_u32 v10, v6, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v9, v3 -; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v8, v3 -; CHECK-NEXT: v_mul_hi_u32 v8, v8, v3 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_lo_u32 v9, v6, v11 -; CHECK-NEXT: v_mul_lo_u32 v10, v3, v8 -; CHECK-NEXT: v_mul_hi_u32 v12, v3, v11 -; CHECK-NEXT: v_mul_hi_u32 v11, v6, v11 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v12, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3] +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9 +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v10, v8, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v12, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_mul_hi_u32 v10, v3, v8 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v12, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v8, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v12, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, v4, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v5, v3 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v9, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v11, v4, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v3 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v6 -; CHECK-NEXT: v_mul_hi_u32 v11, v1, v3 -; CHECK-NEXT: v_mul_lo_u32 v10, v1, v3 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_subb_u32_e64 v9, s[4:5], v5, v8, vcc -; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v8 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2 -; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v2 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v1, v3 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v10, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v11, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v8, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v10, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v4, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v7 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v10 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v9 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v10, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v4, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -217,127 +208,118 @@ ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s11 ; CHECK-NEXT: s_mov_b32 s7, s6 ; CHECK-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; CHECK-NEXT: s_sub_u32 s0, 0, s10 +; CHECK-NEXT: s_sub_u32 s3, 0, s10 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: s_subb_u32 s1, 0, s11 +; CHECK-NEXT: s_subb_u32 s5, 0, s11 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v1, v1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 -; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v2, s0, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s0, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; CHECK-NEXT: v_mul_lo_u32 v2, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v3, s0, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v6, s11 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; CHECK-NEXT: v_mul_lo_u32 v2, s13, v0 ; CHECK-NEXT: v_mul_lo_u32 v3, s12, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, s12, v0 +; CHECK-NEXT: v_mul_hi_u32 v4, s12, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s13, v0 -; CHECK-NEXT: v_mov_b32_e32 v4, s13 +; CHECK-NEXT: v_mul_hi_u32 v5, s13, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s13, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, s13, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_mul_hi_u32 v3, s12, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v1, s13, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0 -; CHECK-NEXT: v_mul_lo_u32 v1, s10, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, s10, v0 -; CHECK-NEXT: v_mul_lo_u32 v3, s10, v0 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s12, v3 -; CHECK-NEXT: v_subb_u32_e64 v3, s[0:1], v4, v1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v5, s13 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s12, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v3, s11 +; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s13, v1 -; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v3 -; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[0:1] +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v2 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v6, s[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] ; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 @@ -392,280 +374,264 @@ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v5 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v4, v4, v8 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v5, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v4, v8 +; GISEL-NEXT: v_xor_b32_e32 v4, v9, v8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v5 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v4, vcc ; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v10 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v9 -; GISEL-NEXT: v_trunc_f32_e32 v10, v10 -; GISEL-NEXT: v_mac_f32_e32 v9, 0xcf800000, v10 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v13, v9 -; GISEL-NEXT: v_mul_lo_u32 v15, v12, v10 +; GISEL-NEXT: v_trunc_f32_e32 v11, v10 +; GISEL-NEXT: v_mac_f32_e32 v9, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v11 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[10:11] ; GISEL-NEXT: v_mul_hi_u32 v17, v12, v9 -; GISEL-NEXT: v_mul_lo_u32 v16, v12, v9 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_mul_lo_u32 v15, v10, v16 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14 -; GISEL-NEXT: v_mul_hi_u32 v18, v9, v16 -; GISEL-NEXT: v_mul_hi_u32 v16, v10, v16 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v11 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v18, v10, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_mul_hi_u32 v17, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v18, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11] +; GISEL-NEXT: v_mul_lo_u32 v11, v15, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v17, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v13, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 -; GISEL-NEXT: v_mul_lo_u32 v15, v12, v9 -; GISEL-NEXT: v_mul_hi_u32 v12, v12, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_lo_u32 v13, v10, v15 -; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v9, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v10, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v10, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v14, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_mul_hi_u32 v12, v10, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v1, v9 -; GISEL-NEXT: v_mul_lo_u32 v13, v0, v10 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v12, v9 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v10, vcc +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v16, 0 +; GISEL-NEXT: v_mov_b32_e32 v9, v11 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v13, v15, v[9:10] +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v13, v0, v9 +; GISEL-NEXT: v_mul_lo_u32 v0, v15, v10 +; GISEL-NEXT: v_mul_lo_u32 v12, v16, v11 +; GISEL-NEXT: v_xor_b32_e32 v14, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v1, v16, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v11 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_mul_hi_u32 v12, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v9 -; GISEL-NEXT: v_mul_lo_u32 v13, v4, v10 -; GISEL-NEXT: v_mul_hi_u32 v15, v4, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, v4, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 -; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], v1, v12, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v12 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v5 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v13, v5 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[4:5] -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v10, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v0, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 -; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v14, v4, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v4 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v4 -; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 -; GISEL-NEXT: v_xor_b32_e32 v5, v11, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v7 -; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v6 -; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 -; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 -; GISEL-NEXT: v_trunc_f32_e32 v9, v9 -; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v9 -; GISEL-NEXT: v_mul_hi_u32 v16, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v11, v8 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v5 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_mul_lo_u32 v14, v9, v15 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 -; GISEL-NEXT: v_mul_hi_u32 v17, v8, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v11, v11, v8 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11 -; GISEL-NEXT: v_mul_hi_u32 v15, v8, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v11, v15, v11 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v15, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v14, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v14, v1 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v10 +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v15, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v12, v10 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v5, v16, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v15, v[10:11] +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v10 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v1, v10 +; GISEL-NEXT: v_xor_b32_e32 v12, v6, v10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v12 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v13, v0 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v14, v11 +; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v14, v11, vcc +; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v0, v4, vcc +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v11, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 +; GISEL-NEXT: v_sub_i32_e32 v18, vcc, 0, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v18, v14, 0 +; GISEL-NEXT: v_subb_u32_e32 v19, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v5 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v13, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v18, v11, v[1:2] +; GISEL-NEXT: v_mul_lo_u32 v1, v11, v0 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v20, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v19, v14, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v4 +; GISEL-NEXT: v_mul_lo_u32 v6, v14, v5 +; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[8:9] +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v20, v21, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v20, vcc, 1, v6 +; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v20, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v17, v17, v21, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v13, v14, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v3, v8 -; GISEL-NEXT: v_mul_lo_u32 v12, v2, v9 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v5, v2, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v3, v9 -; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v13, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v15, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v11, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v6, v8 +; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v13, v[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v14, v16, v17, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v6 +; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v9, v13, v4 +; GISEL-NEXT: v_xor_b32_e32 v15, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v3, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_mul_hi_u32 v13, v15, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v15, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_lo_u32 v9, v7, v5 -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v13, v6, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, v6, v5 -; GISEL-NEXT: v_xor_b32_e32 v4, v10, v4 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v3, v9, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v7 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v11, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[0:1] +; GISEL-NEXT: v_xor_b32_e32 v9, v14, v8 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v8 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v8, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v12 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v12 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v7 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v12, s[4:5] -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v8, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v13, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v11 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v6, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -690,139 +656,130 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v11 -; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v5 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v5, vcc -; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v3 -; CGP-NEXT: v_trunc_f32_e32 v11, v11 -; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v11 -; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_cvt_u32_f32_e32 v11, v11 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v1 -; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v15, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v17, v12, v3 -; CGP-NEXT: v_mul_lo_u32 v16, v12, v3 -; CGP-NEXT: v_xor_b32_e32 v4, v4, v5 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; CGP-NEXT: v_mul_lo_u32 v15, v11, v16 -; CGP-NEXT: v_mul_lo_u32 v17, v3, v14 -; CGP-NEXT: v_mul_hi_u32 v18, v3, v16 -; CGP-NEXT: v_mul_hi_u32 v16, v11, v16 -; CGP-NEXT: v_xor_b32_e32 v10, v10, v5 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v5, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v3, v2 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v18, v15, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v18, v11, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_mul_hi_u32 v17, v3, v14 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v16 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v17, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v18, v1 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v14, v11, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v14, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v14, v12, v11 -; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 -; CGP-NEXT: v_mul_hi_u32 v12, v12, v3 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_lo_u32 v13, v11, v15 -; CGP-NEXT: v_mul_lo_u32 v14, v3, v12 -; CGP-NEXT: v_mul_hi_u32 v16, v3, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v11, v15 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v2, vcc +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3] +; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3] +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v13 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v13, vcc +; CGP-NEXT: v_xor_b32_e32 v11, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v3, v15, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v1, v15, v1 +; CGP-NEXT: v_xor_b32_e32 v10, v10, v13 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v11, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v14, v3, v12 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v16, v15, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 +; CGP-NEXT: v_mul_hi_u32 v14, v12, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v16, v1 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v12, v11, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v10, v3 -; CGP-NEXT: v_mul_lo_u32 v13, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v10, v3 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v10, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v11 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_mul_hi_u32 v2, v15, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v11, v10, v11 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v15, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v2 +; CGP-NEXT: v_mul_hi_u32 v14, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v2 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v3 -; CGP-NEXT: v_mul_lo_u32 v13, v1, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v1, v3 -; CGP-NEXT: v_mul_lo_u32 v14, v1, v3 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v10, v12, vcc -; CGP-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v12 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v2 -; CGP-NEXT: v_subb_u32_e32 v10, vcc, v10, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v1 -; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v13, v2 -; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v10, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 +; CGP-NEXT: v_mul_hi_u32 v12, v11, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v1, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v14, 0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v3 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v12, v[2:3] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v14, v[2:3] +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v10, v2, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v10, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 +; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v14 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v2 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v13 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v14, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v10 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v4, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v5, v0 -; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v3, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 ; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -867,139 +824,130 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v4, v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v9 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v7 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v7, vcc -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v9, v9 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 -; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v11, v5 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v5 -; CGP-NEXT: v_mul_lo_u32 v14, v10, v5 -; CGP-NEXT: v_xor_b32_e32 v6, v6, v7 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_mul_lo_u32 v13, v9, v14 -; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 -; CGP-NEXT: v_mul_hi_u32 v16, v5, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v14 -; CGP-NEXT: v_xor_b32_e32 v8, v8, v7 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v7, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v7 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v7, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v5, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v5 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v4 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v9, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v15, v5 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v12, v9, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v11, v5 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v9 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v5 -; CGP-NEXT: v_mul_hi_u32 v10, v10, v5 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v9, v13 -; CGP-NEXT: v_mul_lo_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v5, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v11 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 +; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v9, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v10, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v8, v5 -; CGP-NEXT: v_mul_lo_u32 v11, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v5 -; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v9 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v13, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v8, v3 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v8, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v8, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_lo_u32 v10, v4, v5 -; CGP-NEXT: v_mul_lo_u32 v11, v3, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v12, v3, v5 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, v6, v12 -; CGP-NEXT: v_subb_u32_e64 v11, s[4:5], v8, v10, vcc -; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 -; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v4, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, v6, v3 -; CGP-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 -; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v5 -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v12, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CGP-NEXT: v_mul_hi_u32 v10, v9, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v3, v5 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v12, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v10, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v12, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v8, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v8, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 ; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v11 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v11, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v6, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v5, v7, v2 -; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v11, v2 +; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 @@ -1042,141 +990,133 @@ ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s5, 0xf000 -; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: s_movk_i32 s7, 0xf000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v4, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v4 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v5, 0 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_mul_lo_u32 v4, v6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v3, vcc +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v5, 0 +; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_xor_b32_e32 v4, v0, v7 +; CHECK-NEXT: v_mul_lo_u32 v0, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_xor_b32_e32 v9, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v1, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v8, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v3, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v8, s6 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v6, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v7 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = sdiv i64 %num, 4096 ret i64 %result @@ -1187,284 +1127,268 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_movk_i32 s10, 0x1000 -; GISEL-NEXT: s_mov_b32 s6, 0 +; GISEL-NEXT: s_mov_b32 s8, 0 ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: s_mov_b32 s7, s6 +; GISEL-NEXT: s_mov_b32 s9, s8 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[6:7] -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 -; GISEL-NEXT: s_sub_u32 s4, 0, s8 -; GISEL-NEXT: s_subb_u32 s5, 0, s9 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[8:9] +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s7 +; GISEL-NEXT: s_sub_u32 s11, 0, s6 +; GISEL-NEXT: s_subb_u32 s12, 0, s7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s11, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v8, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s12, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v9, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s11, v8, v[4:5] +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s12, v9, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v6 +; GISEL-NEXT: v_xor_b32_e32 v11, v1, v4 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v0, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v7, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s9, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s8, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v5 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v7, v0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v1, s7 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v11, v5 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 -; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s7 -; GISEL-NEXT: s_sub_u32 s4, 0, s6 -; GISEL-NEXT: s_subb_u32 s5, 0, s7 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[8:9] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v1, vcc ; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 ; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GISEL-NEXT: s_sub_u32 s6, 0, s8 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v5, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v15, v[1:2] +; GISEL-NEXT: s_subb_u32 s7, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v13, v[5:6] +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v14, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_mul_hi_u32 v10, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s7, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s6, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v15, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v10, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v12, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v10, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v9, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5 +; GISEL-NEXT: v_xor_b32_e32 v11, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s8, v10, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s8, v12, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s9, v10, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_mov_b32_e32 v8, s9 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: @@ -1476,267 +1400,252 @@ ; CGP-NEXT: s_movk_i32 s7, 0xf000 ; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v6 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v4, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6] +; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v9, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v6 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v6 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v11, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v11, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v11, v6 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v10, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v7, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1 ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc -; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mov_b32_e32 v15, s4 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v13, 0 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v16, v[1:2] +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v16, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v16, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v12, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v12, v0 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v12, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v7, v12, v2 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v7, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[0:1] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v7, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v10, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v6 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result @@ -1746,429 +1655,405 @@ ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 -; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v4, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v4 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v5, 0 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_mul_lo_u32 v4, v6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v3, vcc +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v5, 0 +; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_xor_b32_e32 v4, v0, v7 +; CHECK-NEXT: v_mul_lo_u32 v0, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_xor_b32_e32 v9, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v1, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 -; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v3, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v8, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v6, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v7 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = sdiv i64 %num, 1235195 ret i64 %result } define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { -; GISEL-LABEL: v_sdiv_v2i64_oddk_denom: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, 0x12d8fb -; GISEL-NEXT: s_mov_b32 s6, 0 -; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: s_mov_b32 s7, s6 -; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[6:7] -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 -; GISEL-NEXT: s_sub_u32 s4, 0, s8 -; GISEL-NEXT: s_subb_u32 s5, 0, s9 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-LABEL: v_sdiv_v2i64_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, 0x12d8fb +; GISEL-NEXT: s_mov_b32 s8, 0 +; GISEL-NEXT: s_add_u32 s4, s10, 0 +; GISEL-NEXT: s_mov_b32 s9, s8 +; GISEL-NEXT: s_addc_u32 s5, 0, 0 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[8:9] +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s7 +; GISEL-NEXT: s_sub_u32 s11, 0, s6 +; GISEL-NEXT: s_subb_u32 s12, 0, s7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s11, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v8, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s12, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v9, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s11, v8, v[4:5] +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s12, v9, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v6 +; GISEL-NEXT: v_xor_b32_e32 v11, v1, v4 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v0, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v7, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s9, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s8, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v5 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v7, v0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v1, s7 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v11, v5 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 -; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s7 -; GISEL-NEXT: s_sub_u32 s4, 0, s6 -; GISEL-NEXT: s_subb_u32 s5, 0, s7 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[8:9] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v1, vcc ; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 ; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GISEL-NEXT: s_sub_u32 s6, 0, s8 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v5, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v15, v[1:2] +; GISEL-NEXT: s_subb_u32 s7, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v13, v[5:6] +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v14, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_mul_hi_u32 v10, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s7, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, s6, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, s6, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v15, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v10, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v12, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v10, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v11, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v9, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5 +; GISEL-NEXT: v_xor_b32_e32 v11, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s8, v10, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s8, v12, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s9, v10, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_mov_b32_e32 v8, s9 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v6, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: @@ -2180,267 +2065,252 @@ ; CGP-NEXT: s_mov_b32 s7, 0xffed2705 ; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v6 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v4, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6] +; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v9, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v6 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v6 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v11, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v11, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v11, v6 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v10, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v7, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1 ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc -; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mov_b32_e32 v15, s4 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v13, 0 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v16, v[1:2] +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v16, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v16, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v12, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v12, v0 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v12, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v7, v12, v2 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v7, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[0:1] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v7, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v10, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v6 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result @@ -2465,139 +2335,130 @@ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v2, v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc -; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CHECK-NEXT: v_trunc_f32_e32 v6, v6 -; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v2, vcc -; CHECK-NEXT: v_mul_lo_u32 v10, v9, v5 -; CHECK-NEXT: v_mul_lo_u32 v11, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v13, v8, v5 -; CHECK-NEXT: v_mul_lo_u32 v12, v8, v5 -; CHECK-NEXT: v_xor_b32_e32 v3, v3, v7 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CHECK-NEXT: v_mul_lo_u32 v11, v6, v12 -; CHECK-NEXT: v_mul_lo_u32 v13, v5, v10 -; CHECK-NEXT: v_mul_hi_u32 v14, v5, v12 -; CHECK-NEXT: v_mul_hi_u32 v12, v6, v12 -; CHECK-NEXT: v_xor_b32_e32 v4, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_xor_b32_e32 v7, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v8, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v7 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v8 +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 +; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v5, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v5 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v13 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v14, v6, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CHECK-NEXT: v_mul_hi_u32 v13, v5, v10 -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v14, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CHECK-NEXT: v_mul_hi_u32 v10, v6, v10 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 -; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v9, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v8, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v8, v5 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_lo_u32 v9, v6, v11 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v8 -; CHECK-NEXT: v_mul_hi_u32 v12, v5, v11 -; CHECK-NEXT: v_mul_hi_u32 v11, v6, v11 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v12, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_mul_hi_u32 v10, v5, v8 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v4, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v3, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v10, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v6 -; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v10 -; CHECK-NEXT: v_subb_u32_e64 v9, s[4:5], v4, v8, vcc -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v8 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2 -; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v2 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v13, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v2, vcc +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3] +; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v10 +; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v10, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v10 +; CHECK-NEXT: v_mul_lo_u32 v2, v12, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v9, v5 +; CHECK-NEXT: v_xor_b32_e32 v11, v3, v10 +; CHECK-NEXT: v_mul_hi_u32 v3, v9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v12, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v3, v11, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v11, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v1, v3 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v6, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v3 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v5, v[2:3] +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, v[2:3] +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v11, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v8 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v9, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v6 +; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v8 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v9 -; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v10, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v3, v7, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -2643,283 +2504,267 @@ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000 ; GISEL-NEXT: v_lshl_b64 v[7:8], s[4:5], v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v1 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v8 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v4 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v8, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 -; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v5 -; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v5, v4 +; GISEL-NEXT: v_xor_b32_e32 v5, v8, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v7 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v5 -; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v7, vcc ; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 -; GISEL-NEXT: v_trunc_f32_e32 v9, v9 -; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GISEL-NEXT: v_xor_b32_e32 v17, v0, v10 -; GISEL-NEXT: v_mul_lo_u32 v13, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v9 +; GISEL-NEXT: v_trunc_f32_e32 v10, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v10 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[6:7], v12, v11, 0 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v12, v14, v[9:10] ; GISEL-NEXT: v_mul_hi_u32 v16, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_mul_lo_u32 v14, v9, v15 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 -; GISEL-NEXT: v_mul_hi_u32 v0, v8, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v13, v11, v[9:10] +; GISEL-NEXT: v_mul_lo_u32 v10, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v16, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v9, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v13, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v11, v11, v0 -; GISEL-NEXT: v_xor_b32_e32 v14, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v13 -; GISEL-NEXT: v_mul_lo_u32 v12, v0, v9 -; GISEL-NEXT: v_mul_hi_u32 v1, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v11, v8 +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v14, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v12, v15, 0 +; GISEL-NEXT: v_mov_b32_e32 v8, v10 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v12, v14, v[8:9] +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v13, v15, v[10:11] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v13, v0, v8 +; GISEL-NEXT: v_mul_lo_u32 v0, v14, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v15, v10 +; GISEL-NEXT: v_xor_b32_e32 v16, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v1, v15, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v14, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v11, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v1 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v14, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v17, v8 -; GISEL-NEXT: v_lshl_b64 v[0:1], s[4:5], v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v17, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v14, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; GISEL-NEXT: v_mul_hi_u32 v12, v17, v8 +; GISEL-NEXT: v_mul_hi_u32 v10, v14, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v14, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v16, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v10, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v0, v9 +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v7, v14, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_mul_lo_u32 v9, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 -; GISEL-NEXT: v_mul_hi_u32 v13, v5, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, v5, v6 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v17, v12 -; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v14, v9, vcc -; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], v14, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v7 -; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v9, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v5 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v7 -; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, v14, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v6 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v8, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v13 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v13, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v9, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc -; GISEL-NEXT: v_xor_b32_e32 v7, v10, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v8, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v9, v1, v4 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v9 -; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7 -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v7 -; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v10 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v10, vcc -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_mul_f32_e32 v3, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v3, v3 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v3 -; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v9, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v3 -; GISEL-NEXT: v_mul_hi_u32 v16, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v15, v11, v0 -; GISEL-NEXT: v_xor_b32_e32 v17, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_mul_lo_u32 v14, v3, v15 -; GISEL-NEXT: v_mul_lo_u32 v16, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v1, v0, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v3, v15 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v11, v9 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v7, v15, v[1:2] +; GISEL-NEXT: v_lshl_b64 v[11:12], s[4:5], v6 +; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v12 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v14, v[9:10] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v6 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v12, v6, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v1, v6 +; GISEL-NEXT: v_xor_b32_e32 v12, v10, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v11 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v12 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v13, v0 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v16, v9 +; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v10 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v16, v9, vcc +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v0, v5, vcc +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v10, v1 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v0 +; GISEL-NEXT: v_sub_i32_e32 v18, vcc, 0, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v21, v10 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v18, v16, 0 +; GISEL-NEXT: v_subb_u32_e32 v19, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v7 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v13, v7 +; GISEL-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v18, v21, v[1:2] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v7 +; GISEL-NEXT: v_mul_lo_u32 v1, v21, v0 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v19, v16, v[9:10] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v20, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v9 +; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v16, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; GISEL-NEXT: v_mul_hi_u32 v0, v21, v0 +; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v13, v20, s[4:5] +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v15, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v7 +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v13, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v20, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_mul_lo_u32 v10, v21, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v16, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v9, v21, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v3, v13 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v16, v1 -; GISEL-NEXT: v_mul_hi_u32 v16, v0, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_mul_hi_u32 v13, v3, v13 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v14, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v21, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v18, v9, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v10, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v8 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v9, v[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v13, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v10, v0 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v4 +; GISEL-NEXT: v_xor_b32_e32 v15, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v3, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v1 -; GISEL-NEXT: v_mul_lo_u32 v13, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v11, v11, v0 -; GISEL-NEXT: v_xor_b32_e32 v4, v10, v4 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; GISEL-NEXT: v_mul_lo_u32 v11, v1, v13 -; GISEL-NEXT: v_mul_lo_u32 v12, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v14, v0, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v1, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v1, v3 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v0, v3 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v14, v2 +; GISEL-NEXT: v_mul_hi_u32 v14, v9, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v11 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v2, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v17, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v5, v17, v11 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v6, v7, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v6, v2, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v2, v11 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v17, v3 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_mul_lo_u32 v6, v9, v5 -; GISEL-NEXT: v_mul_lo_u32 v7, v8, v3 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v5 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v17, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v2, v6, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v9 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v7, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v12, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; GISEL-NEXT: v_mul_hi_u32 v4, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v10, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v5, v2 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_xor_b32_e32 v9, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v15, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v13, v15, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v10, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v13, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v8 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v10, v[3:4] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v8, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v12 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v11 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v12 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v8, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v13, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v8, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v7, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -2946,139 +2791,130 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v0 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9 -; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc -; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v3 -; CGP-NEXT: v_trunc_f32_e32 v9, v9 -; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v9 -; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 -; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v12, v3 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v16, v11, v3 -; CGP-NEXT: v_mul_lo_u32 v15, v11, v3 -; CGP-NEXT: v_xor_b32_e32 v4, v4, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_mul_lo_u32 v14, v9, v15 -; CGP-NEXT: v_mul_lo_u32 v16, v3, v13 -; CGP-NEXT: v_mul_hi_u32 v17, v3, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v9, v15 -; CGP-NEXT: v_xor_b32_e32 v8, v8, v10 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v10, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CGP-NEXT: v_trunc_f32_e32 v3, v2 +; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v14, v[2:3] +; CGP-NEXT: v_mul_hi_u32 v15, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v11, v[2:3] +; CGP-NEXT: v_mul_lo_u32 v3, v14, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_mul_lo_u32 v16, v11, v2 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v17, v9, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_hi_u32 v16, v3, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v2 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v17, v1 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v12, v3 -; CGP-NEXT: v_mul_lo_u32 v13, v11, v9 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v11, v11, v3 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v14 -; CGP-NEXT: v_mul_lo_u32 v13, v3, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v3, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v15, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v1 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v2, vcc +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v14, v[2:3] +; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v11, v[2:3] +; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v12 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v3, v12 +; CGP-NEXT: v_mul_lo_u32 v3, v14, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v2 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_xor_b32_e32 v8, v8, v12 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v9, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v13, v3, v11 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v15, v14, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v15, v1 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v8, v3 -; CGP-NEXT: v_mul_lo_u32 v12, v4, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v8, v3 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v12, v4, v9 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_hi_u32 v2, v14, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v2 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v8, v1 +; CGP-NEXT: v_mul_hi_u32 v14, v8, v2 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_mul_lo_u32 v11, v2, v3 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v9 -; CGP-NEXT: v_mul_hi_u32 v14, v1, v3 -; CGP-NEXT: v_mul_lo_u32 v13, v1, v3 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v8, v11, vcc -; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v11 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v2 -; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v1 -; CGP-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v2 -; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v3 -; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v13, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v13 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v8, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v1, v3 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v13, 0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v11, v[2:3] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v9, v1 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v13, v[2:3] +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v8, v2, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v8, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v10 +; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v10 +; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v8, v9, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v13 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v2 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v10 ; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v12 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v8 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, 0, v9, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v4, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v10, v0 -; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v3, v12, v0 +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 ; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 @@ -3124,138 +2960,129 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v4, v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v8, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v11, v6 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v6 -; CGP-NEXT: v_mul_lo_u32 v14, v10, v6 -; CGP-NEXT: v_xor_b32_e32 v5, v5, v9 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_mul_lo_u32 v13, v8, v14 -; CGP-NEXT: v_mul_lo_u32 v15, v6, v12 -; CGP-NEXT: v_mul_hi_u32 v16, v6, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v8, v14 -; CGP-NEXT: v_xor_b32_e32 v7, v7, v9 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v10, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v10 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc +; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CGP-NEXT: v_trunc_f32_e32 v8, v4 +; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v8 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_mul_hi_u32 v9, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v8, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v6, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v11, v6 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v8 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v10, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v8, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v6 -; CGP-NEXT: v_mul_hi_u32 v6, v7, v6 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_mul_lo_u32 v10, v4, v6 -; CGP-NEXT: v_mul_lo_u32 v11, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v13, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v12, v3, v6 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_subb_u32_e64 v11, s[4:5], v7, v10, vcc -; CGP-NEXT: v_sub_i32_e64 v7, s[4:5], v7, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 -; CGP-NEXT: v_subb_u32_e32 v7, vcc, v7, v4, vcc -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v3 -; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 -; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6 -; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v8, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9] +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v12 +; CGP-NEXT: v_mul_lo_u32 v4, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v9, v11, v8 +; CGP-NEXT: v_xor_b32_e32 v13, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v14, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v11, v8 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v14, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 +; CGP-NEXT: v_mul_lo_u32 v8, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v13, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_mul_hi_u32 v8, v7, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v3, v5 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v9, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v5 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, v[4:5] +; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v13, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v10 +; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10 ; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v11 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v12, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v11, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v11, v4, vcc -; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v5, v9, v2 +; CGP-NEXT: v_cndmask_b32_e32 v3, v7, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v12, v2 ; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc ; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 @@ -3356,269 +3183,254 @@ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s6, 0xffffff ; GISEL-NEXT: v_and_b32_e32 v1, s6, v4 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 -; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 -; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 0, v1 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v3 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v1, vcc ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_and_b32_e32 v5, s6, v0 -; GISEL-NEXT: v_and_b32_e32 v6, s6, v6 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v4, v4 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v0, v9, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v12 -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v10 -; GISEL-NEXT: v_and_b32_e32 v0, s6, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v4, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v13, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v13, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v2 -; GISEL-NEXT: v_mul_lo_u32 v9, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v2 -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v4 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 +; GISEL-NEXT: v_and_b32_e32 v13, s6, v2 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v4 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 0, v0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v4 ; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v8, v9, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v7 -; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v2, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v11, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v10, v4 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4 -; GISEL-NEXT: v_mul_hi_u32 v10, v1, v2 -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v11, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], v11, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v5, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v2 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 0, v6 -; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v5 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v9 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v10, vcc -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v11, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v12, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GISEL-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v6 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v2 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v2, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v3 -; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v5, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v10, v9, v2 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v2 -; GISEL-NEXT: v_mul_lo_u32 v12, v8, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v13, v2, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v2, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v6, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v2 -; GISEL-NEXT: v_mul_lo_u32 v9, v8, v6 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v2 -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v2, v10 -; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v8, v11, v4 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v7 -; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v2, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v0 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v6 -; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v8, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v3, v12, v[0:1] +; GISEL-NEXT: v_and_b32_e32 v0, s6, v6 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v10, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v9, v[7:8] +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v11, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], 0, v0 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v10 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 +; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v5, v1, vcc +; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v11 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v4, vcc +; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v11, v4 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 +; GISEL-NEXT: v_sub_i32_e32 v15, vcc, 0, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0 +; GISEL-NEXT: v_subb_u32_e32 v16, vcc, 0, v10, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v12, vcc +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v3 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v15, v11, v[0:1] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v16, v14, v[2:3] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v7, v1 +; GISEL-NEXT: v_mul_lo_u32 v1, v11, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v14, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v14, v2 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v6 -; GISEL-NEXT: v_mul_hi_u32 v2, v12, v2 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v6 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v12, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v2, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v15, v4, 0 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v17 +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v15, v5, v[1:2] +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v18, vcc +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v16, v4, v[1:2] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v17, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v7, v18, v11, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v9, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v2, v5, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v4, v1 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], 0, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v0 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v2, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v1 +; GISEL-NEXT: v_mul_hi_u32 v0, v5, v0 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v4, v1 +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v13, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], v13, v3 +; GISEL-NEXT: v_mul_hi_u32 v1, v5, v1 +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v3, v2 +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v2 +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v4, v0 +; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v5, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v9, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v4, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v5, v2 -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v3, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v3, v2 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v8 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v12, v6, vcc -; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v12, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 -; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v6, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v2 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v6, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v9 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v6, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v3 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v0, v2 +; GISEL-NEXT: v_mul_hi_u32 v1, v11, v1 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v7, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v1, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v12, v[0:1] +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v6 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v7, v[3:4] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v11, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v11, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v8 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v10 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v7 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc ; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -162,145 +162,136 @@ ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s0, 0, s8 -; GFX8-NEXT: s_subb_u32 s1, 0, s9 +; GFX8-NEXT: s_sub_u32 s14, 0, s8 +; GFX8-NEXT: s_subb_u32 s15, 0, s9 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v1, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX8-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v6, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v6, v4 +; GFX8-NEXT: v_trunc_f32_e32 v2, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, s9 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s10, v0 +; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, s11 +; GFX8-NEXT: v_mul_hi_u32 v5, s11, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v5, s11, v1 +; GFX8-NEXT: v_mul_lo_u32 v4, s11, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 ; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v1, s11, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX8-NEXT: v_mul_hi_u32 v7, s8, v0 -; GFX8-NEXT: v_mul_lo_u32 v5, s8, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s10, v5 -; GFX8-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v2, vcc -; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s11, v2 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v6, s11 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s10, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v5, s9 +; GFX8-NEXT: v_subb_u32_e64 v2, s[0:1], v6, v1, vcc +; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s11, v1 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v3 -; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v0 +; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s8, v7 +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s8, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 -; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[0:1] ; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX8-NEXT: v_mov_b32_e32 v4, s1 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v0, v5, vcc +; GFX8-NEXT: v_xor_b32_e32 v0, s0, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc +; GFX8-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 -; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_xor_b32_e32 v3, s2, v5 ; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2 ; GFX8-NEXT: v_mov_b32_e32 v5, s2 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3 @@ -332,146 +323,140 @@ ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s0, 0, s8 -; GFX9-NEXT: s_subb_u32 s1, 0, s9 -; GFX9-NEXT: v_mov_b32_e32 v8, s11 +; GFX9-NEXT: s_sub_u32 s14, 0, s8 +; GFX9-NEXT: s_subb_u32 s15, 0, s9 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v6, v3 +; GFX9-NEXT: v_trunc_f32_e32 v2, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, s9 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 -; GFX9-NEXT: v_mul_hi_u32 v5, s10, v0 +; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX9-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-NEXT: v_mul_hi_u32 v6, s11, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v5, s11, v1 +; GFX9-NEXT: v_mul_lo_u32 v4, s11, v1 ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_hi_u32 v3, s10, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s11, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v5, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v5, v3 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v5, s8, v0 -; GFX9-NEXT: v_mul_lo_u32 v7, s8, v0 -; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v5 -; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s10, v7 -; GFX9-NEXT: v_subb_co_u32_e64 v5, s[0:1], v8, v2, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v5 -; GFX9-NEXT: v_sub_u32_e32 v2, s11, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 +; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v6, s11 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 +; GFX9-NEXT: v_sub_u32_e32 v1, s11, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v5 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v3 -; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v2, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v0 +; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s8, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 -; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v12 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v7, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[0:1] ; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v6, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, s0, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc +; GFX9-NEXT: v_xor_b32_e32 v1, s1, v3 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v3, s2, v3 -; GFX9-NEXT: v_xor_b32_e32 v4, s2, v2 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_xor_b32_e32 v3, s2, v6 +; GFX9-NEXT: v_xor_b32_e32 v5, s2, v2 +; GFX9-NEXT: v_mov_b32_e32 v6, s2 ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v5, vcc -; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[6:7] +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v6, vcc +; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_i64: @@ -499,64 +484,62 @@ ; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX10-NEXT: v_trunc_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX10-NEXT: v_mul_lo_u32 v2, s10, v1 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s11, v0 -; GFX10-NEXT: v_mul_hi_u32 v4, s10, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s10, v0 -; GFX10-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v6, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v0, v5 -; GFX10-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX10-NEXT: v_add_co_u32 v3, s14, v3, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s14 +; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v4, s10, v2 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s11, v3 +; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 +; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 +; GFX10-NEXT: v_mul_lo_u32 v4, v2, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX10-NEXT: v_mul_lo_u32 v7, v2, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX10-NEXT: v_add_co_u32 v4, s14, v4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s14 ; GFX10-NEXT: v_add_co_u32 v6, s14, v7, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v3, s14, v3, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v5, s14, v6, v8 +; GFX10-NEXT: v_add_co_u32 v0, s14, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s14 +; GFX10-NEXT: v_add_co_u32 v4, s14, v6, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s14 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v6 -; GFX10-NEXT: v_add_co_u32 v3, s14, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s14 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add3_u32 v2, v4, v5, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, s10, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s10, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v2, s11, v0 -; GFX10-NEXT: v_mul_lo_u32 v4, s10, v1 -; GFX10-NEXT: v_mul_hi_u32 v6, v1, v5 -; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v0, v5 -; GFX10-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX10-NEXT: v_add_co_u32 v3, s10, v3, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s10 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 +; GFX10-NEXT: v_add_co_u32 v0, s14, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s14 +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 +; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 +; GFX10-NEXT: v_mul_lo_u32 v4, s11, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s10, v2 +; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 +; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX10-NEXT: v_mul_lo_u32 v4, v2, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX10-NEXT: v_mul_lo_u32 v7, v2, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX10-NEXT: v_add_co_u32 v4, s10, v4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10 ; GFX10-NEXT: v_add_co_u32 v6, s10, v7, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v3, s10, v3, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v5, s10, v6, v8 +; GFX10-NEXT: v_add_co_u32 v0, s10, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10 +; GFX10-NEXT: v_add_co_u32 v4, s10, v6, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v6 -; GFX10-NEXT: v_add_co_u32 v3, s10, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add3_u32 v2, v4, v5, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 +; GFX10-NEXT: v_add_co_u32 v0, s10, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s10 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 +; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v4, s1, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v1, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX10-NEXT: v_mul_lo_u32 v3, s0, v1 @@ -573,27 +556,26 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s10 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v5, v4 -; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 -; GFX10-NEXT: v_mul_lo_u32 v5, s8, v0 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1 -; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX10-NEXT: v_mul_hi_u32 v3, s8, v0 -; GFX10-NEXT: v_mul_lo_u32 v4, s8, v1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 -; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v8, s1, v2 -; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, s0, v5 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v2, s0, s1, v2, vcc_lo +; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10 +; GFX10-NEXT: v_mul_lo_u32 v4, s9, v2 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 +; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v2, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s8, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v6, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v8, s1, v1 +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v5 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v5, s8 +; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v0, s8 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v8, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v2 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v1 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s9, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 @@ -601,7 +583,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, -1, s0 ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v2 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v12, v9, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v14, v13, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v10, s8 @@ -610,23 +592,23 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v12 ; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v9 ; GFX10-NEXT: s_xor_b64 s[8:9], s[2:3], s[12:13] -; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v13, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v11, v8, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, v6, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v4, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s1 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_xor_b32_e32 v0, s8, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s9, v1 -; GFX10-NEXT: v_xor_b32_e32 v3, s2, v3 -; GFX10-NEXT: v_xor_b32_e32 v5, s2, v2 -; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s8 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s9, v1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s2, v5, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v2, s8, v2 +; GFX10-NEXT: v_xor_b32_e32 v3, s9, v3 +; GFX10-NEXT: v_xor_b32_e32 v5, s2, v0 +; GFX10-NEXT: v_xor_b32_e32 v6, s2, v1 +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v2, s8 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s9, v3, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v5, s2 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s2, v6, vcc_lo ; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] ; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm @@ -1306,285 +1288,268 @@ ; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_ashr_i32 s2, s9, 31 -; GFX8-NEXT: s_ashr_i32 s6, s13, 31 +; GFX8-NEXT: s_ashr_i32 s16, s13, 31 ; GFX8-NEXT: s_add_u32 s0, s8, s2 ; GFX8-NEXT: s_addc_u32 s1, s9, s2 -; GFX8-NEXT: s_add_u32 s8, s12, s6 -; GFX8-NEXT: s_mov_b32 s7, s6 -; GFX8-NEXT: s_addc_u32 s9, s13, s6 -; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] -; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 -; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 +; GFX8-NEXT: s_add_u32 s6, s12, s16 +; GFX8-NEXT: s_mov_b32 s17, s16 +; GFX8-NEXT: s_addc_u32 s7, s13, s16 +; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], s[16:17] +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s6 ; GFX8-NEXT: s_mov_b32 s3, s2 -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[2:3] +; GFX8-NEXT: s_xor_b64 s[8:9], s[0:1], s[2:3] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s0, 0, s8 -; GFX8-NEXT: s_subb_u32 s1, 0, s9 +; GFX8-NEXT: s_sub_u32 s12, 0, s6 +; GFX8-NEXT: s_subb_u32 s13, 0, s7 +; GFX8-NEXT: s_xor_b64 s[16:17], s[2:3], s[16:17] ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v1, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX8-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_trunc_f32_e32 v2, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v6, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v6, v4 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: s_ashr_i32 s12, s15, 31 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX8-NEXT: s_mov_b32 s13, s12 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, s9 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc +; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 +; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX8-NEXT: v_mul_hi_u32 v5, s9, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s13, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s12, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s12, v0 -; GFX8-NEXT: v_mul_hi_u32 v0, s13, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, s13 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v5, s13, v1 +; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_mul_hi_u32 v3, s12, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v1, s13, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v4, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX8-NEXT: v_mul_hi_u32 v7, s8, v0 -; GFX8-NEXT: v_mul_lo_u32 v5, s8, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s12, v5 -; GFX8-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v2, vcc -; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s13, v2 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v3 -; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v3, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v6, s9 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s8, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s7, v4, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v5, s7 +; GFX8-NEXT: s_ashr_i32 s8, s11, 31 +; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s9, v1 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v7 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v6 +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s6, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1] +; GFX8-NEXT: v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], 1, v4 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s8, v7 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 -; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v1 ; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] +; GFX8-NEXT: s_add_u32 s0, s10, s8 +; GFX8-NEXT: s_addc_u32 s1, s11, s8 +; GFX8-NEXT: s_add_u32 s10, s14, s12 +; GFX8-NEXT: s_addc_u32 s11, s15, s12 +; GFX8-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] +; GFX8-NEXT: v_cvt_f32_u32_e32 v14, s11 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s10 +; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, s6, v8 +; GFX8-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v0, vcc +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14 +; GFX8-NEXT: v_add_f32_e32 v0, v0, v5 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v1, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[6:7] -; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX8-NEXT: s_ashr_i32 s6, s11, 31 -; GFX8-NEXT: s_ashr_i32 s8, s15, 31 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 -; GFX8-NEXT: s_add_u32 s0, s10, s6 -; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX8-NEXT: v_mov_b32_e32 v4, s1 -; GFX8-NEXT: s_addc_u32 s1, s11, s6 -; GFX8-NEXT: s_add_u32 s10, s14, s8 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX8-NEXT: v_trunc_f32_e32 v12, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v12 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v13, v0 ; GFX8-NEXT: s_mov_b32 s9, s8 -; GFX8-NEXT: s_addc_u32 s11, s15, s8 -; GFX8-NEXT: s_xor_b64 s[10:11], s[10:11], s[8:9] -; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v4, s11 -; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s10 -; GFX8-NEXT: s_mov_b32 s7, s6 -; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] -; GFX8-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GFX8-NEXT: v_add_f32_e32 v4, v4, v5 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GFX8-NEXT: s_sub_u32 s0, 0, s10 -; GFX8-NEXT: s_subb_u32 s1, 0, s11 -; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 -; GFX8-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GFX8-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; GFX8-NEXT: v_trunc_f32_e32 v6, v6 -; GFX8-NEXT: v_mul_f32_e32 v7, 0xcf800000, v6 -; GFX8-NEXT: v_add_f32_e32 v4, v7, v4 -; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v4 -; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX8-NEXT: v_xor_b32_e32 v2, s2, v2 -; GFX8-NEXT: v_mov_b32_e32 v5, s2 -; GFX8-NEXT: v_mul_lo_u32 v4, s1, v7 -; GFX8-NEXT: v_mul_lo_u32 v8, s0, v6 -; GFX8-NEXT: v_mul_hi_u32 v10, s0, v7 -; GFX8-NEXT: v_mul_lo_u32 v9, s0, v7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v10 -; GFX8-NEXT: v_mul_lo_u32 v10, v6, v9 -; GFX8-NEXT: v_mul_lo_u32 v11, v7, v8 -; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s2, v3 -; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v2, v5, vcc -; GFX8-NEXT: v_mul_hi_u32 v2, v7, v9 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v10, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GFX8-NEXT: s_xor_b64 s[6:7], s[0:1], s[8:9] +; GFX8-NEXT: s_sub_u32 s3, 0, s10 +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v13, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v12 +; GFX8-NEXT: s_subb_u32 s18, 0, s11 +; GFX8-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v5, v[1:2] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 +; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v15, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[14:15], s18, v13, v[1:2] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v9, v16, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc +; GFX8-NEXT: v_mul_lo_u32 v3, v5, v0 +; GFX8-NEXT: v_mul_lo_u32 v8, v13, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc +; GFX8-NEXT: v_mul_hi_u32 v2, v13, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v3, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v9, v6, v9 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v10, v2 -; GFX8-NEXT: v_mul_hi_u32 v10, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v10 -; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_mul_lo_u32 v3, v5, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; GFX8-NEXT: v_mul_hi_u32 v8, v13, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v9, v3 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v8, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, s1, v2 -; GFX8-NEXT: v_mul_lo_u32 v7, s0, v3 -; GFX8-NEXT: v_mul_hi_u32 v9, s0, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, s0, v2 -; GFX8-NEXT: v_mov_b32_e32 v10, s11 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 -; GFX8-NEXT: v_mul_lo_u32 v7, v3, v8 -; GFX8-NEXT: v_mul_lo_u32 v9, v2, v6 -; GFX8-NEXT: v_mul_hi_u32 v11, v2, v8 -; GFX8-NEXT: v_mul_hi_u32 v8, v3, v8 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 +; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v13, v0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v8, 0 +; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc +; GFX8-NEXT: v_xor_b32_e32 v1, s16, v4 +; GFX8-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v5, v[0:1] +; GFX8-NEXT: v_xor_b32_e32 v9, s17, v10 +; GFX8-NEXT: v_mov_b32_e32 v10, s17 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s18, v8, v[3:4] +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s16, v1 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v9, v10, vcc +; GFX8-NEXT: v_xor_b32_e32 v4, s2, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v5, v2 +; GFX8-NEXT: v_mul_lo_u32 v9, v8, v3 +; GFX8-NEXT: v_mul_hi_u32 v11, v8, v2 +; GFX8-NEXT: v_mul_hi_u32 v2, v5, v2 +; GFX8-NEXT: v_xor_b32_e32 v6, s2, v6 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v11, v3, v6 +; GFX8-NEXT: v_mul_lo_u32 v11, v5, v3 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v2, v6 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8 +; GFX8-NEXT: v_mul_hi_u32 v9, v8, v3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v11, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 -; GFX8-NEXT: v_mul_hi_u32 v6, v3, v6 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v9, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v3, v5, v3 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, s13, v2 -; GFX8-NEXT: v_mul_lo_u32 v7, s12, v3 -; GFX8-NEXT: v_mul_hi_u32 v9, s12, v2 -; GFX8-NEXT: v_mul_hi_u32 v2, s13, v2 -; GFX8-NEXT: v_mov_b32_e32 v8, s13 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v9, s13, v3 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc +; GFX8-NEXT: v_mov_b32_e32 v10, s2 +; GFX8-NEXT: v_mul_lo_u32 v7, s7, v2 +; GFX8-NEXT: v_mul_lo_u32 v8, s6, v3 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s2, v4 +; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v10, vcc +; GFX8-NEXT: v_mul_hi_u32 v6, s6, v2 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_mul_hi_u32 v7, s12, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v9, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v7, s7, v3 +; GFX8-NEXT: v_mul_hi_u32 v2, s7, v2 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 +; GFX8-NEXT: v_mul_hi_u32 v8, s6, v3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v3, s13, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6 +; GFX8-NEXT: v_mul_hi_u32 v9, s7, v3 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s10, v8, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_mul_lo_u32 v6, s11, v2 -; GFX8-NEXT: v_mul_lo_u32 v7, s10, v3 -; GFX8-NEXT: v_mul_hi_u32 v11, s10, v2 -; GFX8-NEXT: v_mul_lo_u32 v9, s10, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s12, v9 -; GFX8-NEXT: v_subb_u32_e64 v8, s[0:1], v8, v6, vcc -; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s13, v6 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 +; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s10, v9, v[3:4] +; GFX8-NEXT: v_mov_b32_e32 v10, s7 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s6, v2 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s11, v8, v[6:7] +; GFX8-NEXT: v_mov_b32_e32 v3, s11 +; GFX8-NEXT: v_subb_u32_e64 v7, s[0:1], v10, v6, vcc +; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s7, v6 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8 -; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s10, v7 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v7 +; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s10, v2 ; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] @@ -1592,42 +1557,42 @@ ; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v2 -; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc -; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v3, s[0:1] -; GFX8-NEXT: v_add_u32_e32 v10, vcc, 1, v14 +; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v8 +; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc +; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v9, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 1, v14 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s10, v11 -; GFX8-NEXT: v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v10, vcc +; GFX8-NEXT: v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v13, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v6, v12, v6, vcc -; GFX8-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] -; GFX8-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] -; GFX8-NEXT: v_xor_b32_e32 v2, s0, v2 -; GFX8-NEXT: v_xor_b32_e32 v3, s1, v3 -; GFX8-NEXT: v_mov_b32_e32 v8, s1 +; GFX8-NEXT: v_cndmask_b32_e64 v8, v9, v14, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX8-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] +; GFX8-NEXT: s_xor_b64 s[0:1], s[8:9], s[12:13] +; GFX8-NEXT: v_xor_b32_e32 v2, s0, v6 +; GFX8-NEXT: v_xor_b32_e32 v3, s1, v8 +; GFX8-NEXT: v_mov_b32_e32 v6, s1 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v2 -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc -; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 -; GFX8-NEXT: v_xor_b32_e32 v8, s6, v6 -; GFX8-NEXT: v_mov_b32_e32 v9, s6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v7 -; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v8, v9, vcc +; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; GFX8-NEXT: v_xor_b32_e32 v6, s8, v9 +; GFX8-NEXT: v_xor_b32_e32 v7, s8, v7 +; GFX8-NEXT: v_mov_b32_e32 v8, s8 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s8, v6 +; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v7, v8, vcc ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v8, s12 -; GFX8-NEXT: v_mov_b32_e32 v9, s13 +; GFX8-NEXT: v_mov_b32_e32 v9, s5 +; GFX8-NEXT: v_mov_b32_e32 v8, s4 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: v_mov_b32_e32 v0, s14 -; GFX8-NEXT: v_mov_b32_e32 v1, s15 +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: v_mov_b32_e32 v1, s7 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GFX8-NEXT: s_endpgm ; @@ -1636,273 +1601,262 @@ ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s2, s9, 31 -; GFX9-NEXT: s_ashr_i32 s6, s13, 31 +; GFX9-NEXT: s_ashr_i32 s16, s13, 31 ; GFX9-NEXT: s_add_u32 s0, s8, s2 ; GFX9-NEXT: s_addc_u32 s1, s9, s2 -; GFX9-NEXT: s_add_u32 s8, s12, s6 -; GFX9-NEXT: s_mov_b32 s7, s6 -; GFX9-NEXT: s_addc_u32 s9, s13, s6 -; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 +; GFX9-NEXT: s_add_u32 s6, s12, s16 +; GFX9-NEXT: s_mov_b32 s17, s16 +; GFX9-NEXT: s_addc_u32 s7, s13, s16 +; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[16:17] +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s6 ; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[2:3] +; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[2:3] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_u32 s0, 0, s8 -; GFX9-NEXT: s_subb_u32 s1, 0, s9 +; GFX9-NEXT: s_sub_u32 s12, 0, s6 +; GFX9-NEXT: s_subb_u32 s13, 0, s7 +; GFX9-NEXT: s_xor_b64 s[16:17], s[2:3], s[16:17] ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v6, v3 +; GFX9-NEXT: v_trunc_f32_e32 v2, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v7, s13 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX9-NEXT: s_ashr_i32 s12, s15, 31 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX9-NEXT: s_mov_b32 s13, s12 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s13, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s12, v0 -; GFX9-NEXT: v_mul_hi_u32 v0, s13, v0 -; GFX9-NEXT: v_mov_b32_e32 v5, s9 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s13, v1 +; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v3, s12, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s13, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v5, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 -; GFX9-NEXT: v_mul_lo_u32 v6, s8, v0 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s12, v6 -; GFX9-NEXT: v_subb_co_u32_e64 v4, s[0:1], v7, v2, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 -; GFX9-NEXT: v_sub_u32_e32 v2, s13, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v3 -; GFX9-NEXT: v_subbrev_co_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 +; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v3, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v6, s9 +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s8, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s7, v5, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v4, s7 +; GFX9-NEXT: s_ashr_i32 s8, s11, 31 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX9-NEXT: v_sub_u32_e32 v0, s9, v1 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v7 +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v6 +; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s6, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1] +; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc +; GFX9-NEXT: v_add_co_u32_e64 v1, s[0:1], 1, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v12, s[0:1], 1, v9 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc +; GFX9-NEXT: v_add_co_u32_e64 v12, s[0:1], 1, v1 ; GFX9-NEXT: v_addc_co_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s8, v7 -; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] -; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[6:7] -; GFX9-NEXT: s_ashr_i32 s6, s11, 31 -; GFX9-NEXT: s_ashr_i32 s8, s15, 31 -; GFX9-NEXT: s_add_u32 s12, s10, s6 -; GFX9-NEXT: s_addc_u32 s13, s11, s6 -; GFX9-NEXT: s_add_u32 s10, s14, s8 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_addc_u32 s11, s15, s8 +; GFX9-NEXT: s_add_u32 s0, s10, s8 +; GFX9-NEXT: s_addc_u32 s1, s11, s8 +; GFX9-NEXT: s_add_u32 s10, s14, s12 +; GFX9-NEXT: s_addc_u32 s11, s15, s12 +; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] +; GFX9-NEXT: v_cvt_f32_u32_e32 v14, s11 +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s10 +; GFX9-NEXT: v_subrev_co_u32_e32 v15, vcc, s6, v8 +; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14 +; GFX9-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[8:9] -; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cvt_f32_u32_e32 v6, s11 -; GFX9-NEXT: v_cvt_f32_u32_e32 v7, s10 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX9-NEXT: v_mul_f32_e32 v4, 0x4f800000, v6 -; GFX9-NEXT: v_add_f32_e32 v4, v4, v7 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GFX9-NEXT: s_mov_b32 s7, s6 -; GFX9-NEXT: s_xor_b64 s[12:13], s[12:13], s[6:7] +; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX9-NEXT: v_trunc_f32_e32 v12, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v12 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v0 +; GFX9-NEXT: s_mov_b32 s9, s8 +; GFX9-NEXT: s_xor_b64 s[6:7], s[0:1], s[8:9] ; GFX9-NEXT: s_sub_u32 s3, 0, s10 -; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mul_f32_e32 v6, 0xcf800000, v5 -; GFX9-NEXT: v_add_f32_e32 v4, v6, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v13, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: s_subb_u32 s14, 0, s11 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, s14, v4 -; GFX9-NEXT: v_mul_lo_u32 v7, s3, v5 -; GFX9-NEXT: v_mul_hi_u32 v8, s3, v4 -; GFX9-NEXT: v_mul_lo_u32 v9, s3, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX9-NEXT: v_add3_u32 v6, v6, v7, v8 -; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX9-NEXT: v_mov_b32_e32 v10, s1 -; GFX9-NEXT: v_mul_lo_u32 v7, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v4, v6 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v10, vcc -; GFX9-NEXT: v_mul_hi_u32 v10, v4, v9 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v12, v[1:2] +; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v10, vcc +; GFX9-NEXT: v_mul_hi_u32 v10, v13, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v13, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v12, v0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 +; GFX9-NEXT: v_mul_lo_u32 v3, v13, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v15, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[0:1] +; GFX9-NEXT: v_mul_hi_u32 v0, v12, v0 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v2, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v10, v12, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NEXT: v_mul_hi_u32 v3, v13, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v12, v1 +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v10, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v0, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v0, s[0:1], v0, v2 +; GFX9-NEXT: v_add_u32_e32 v3, v10, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], v13, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], v12, v1, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v10, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc +; GFX9-NEXT: v_xor_b32_e32 v8, s16, v4 +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v11, v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v5, s17, v5 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v10, v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v9, s17 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s16, v8 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v4, s2, v7 +; GFX9-NEXT: v_mul_lo_u32 v5, v11, v2 +; GFX9-NEXT: v_mul_lo_u32 v7, v10, v3 +; GFX9-NEXT: v_mul_hi_u32 v8, v10, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, v11, v2 +; GFX9-NEXT: v_xor_b32_e32 v6, s2, v6 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, v5, v6 -; GFX9-NEXT: v_mul_hi_u32 v9, v5, v9 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v4, v6 -; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v10, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v8, v7 -; GFX9-NEXT: v_add_u32_e32 v9, v10, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v6, v9, v8, v6 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, s14, v4 -; GFX9-NEXT: v_mul_lo_u32 v7, s3, v5 -; GFX9-NEXT: v_mul_hi_u32 v8, s3, v4 -; GFX9-NEXT: v_mul_lo_u32 v9, s3, v4 -; GFX9-NEXT: v_xor_b32_e32 v3, s2, v3 -; GFX9-NEXT: v_xor_b32_e32 v2, s2, v2 -; GFX9-NEXT: v_add3_u32 v6, v6, v7, v8 -; GFX9-NEXT: v_mul_lo_u32 v7, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v4, v6 -; GFX9-NEXT: v_mul_hi_u32 v11, v4, v9 -; GFX9-NEXT: v_mul_hi_u32 v9, v5, v9 -; GFX9-NEXT: v_mov_b32_e32 v10, s2 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 +; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 +; GFX9-NEXT: v_mul_hi_u32 v7, v10, v3 +; GFX9-NEXT: v_mul_hi_u32 v3, v11, v3 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v11 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v11, v5, v6 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 ; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v4, v6 -; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v11, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v3, v7, v5, v3 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, s7, v2 +; GFX9-NEXT: v_mul_lo_u32 v7, s6, v3 +; GFX9-NEXT: v_mul_hi_u32 v9, s6, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, s7, v2 +; GFX9-NEXT: v_mul_hi_u32 v12, s7, v3 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v9, s7, v3 +; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 +; GFX9-NEXT: v_mul_hi_u32 v7, s6, v3 +; GFX9-NEXT: v_mov_b32_e32 v8, s2 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v8, v7 -; GFX9-NEXT: v_add_u32_e32 v9, v11, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v6, v9, v8, v6 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v4, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v5, v6, vcc -; GFX9-NEXT: v_mul_lo_u32 v8, s13, v7 -; GFX9-NEXT: v_mul_lo_u32 v9, s12, v6 -; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s2, v3 -; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v10, vcc -; GFX9-NEXT: v_mul_hi_u32 v2, s12, v7 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v3, s13, v6 -; GFX9-NEXT: v_mul_hi_u32 v7, s13, v7 -; GFX9-NEXT: v_add_u32_e32 v2, v8, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, s12, v6 -; GFX9-NEXT: v_mul_hi_u32 v6, s13, v6 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v3, v7, v3, v6 -; GFX9-NEXT: v_mul_lo_u32 v6, s11, v2 -; GFX9-NEXT: v_mul_lo_u32 v7, s10, v3 -; GFX9-NEXT: v_mul_hi_u32 v8, s10, v2 -; GFX9-NEXT: v_mul_lo_u32 v10, s10, v2 -; GFX9-NEXT: v_mov_b32_e32 v11, s13 -; GFX9-NEXT: v_mov_b32_e32 v9, s11 -; GFX9-NEXT: v_add3_u32 v6, v6, v7, v8 -; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s12, v10 -; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v11, v6, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 -; GFX9-NEXT: v_sub_u32_e32 v6, s13, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v2, v5 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s10, v10, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s2, v4 +; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v8, vcc +; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 +; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12 +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s10, v8, v[3:4] +; GFX9-NEXT: v_mov_b32_e32 v9, s7 +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s6, v2 +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s11, v10, v[6:7] +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: v_subb_co_u32_e64 v7, s[0:1], v9, v6, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v7 +; GFX9-NEXT: v_sub_u32_e32 v6, s7, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s10, v7 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v7 +; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s10, v2 ; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] @@ -1910,39 +1864,39 @@ ; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v3, s[0:1] -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 1, v14 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v10 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v6, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v8, s[0:1] +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, 1, v14 ; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v15, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc ; GFX9-NEXT: v_subrev_co_u32_e64 v15, s[0:1], s10, v11 -; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[0:1], 0, v6, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] +; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 ; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v15, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v6, v12, v6, vcc -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] -; GFX9-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] -; GFX9-NEXT: v_xor_b32_e32 v2, s0, v2 -; GFX9-NEXT: v_xor_b32_e32 v3, s1, v3 -; GFX9-NEXT: v_mov_b32_e32 v8, s1 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] +; GFX9-NEXT: s_xor_b64 s[0:1], s[8:9], s[12:13] +; GFX9-NEXT: v_xor_b32_e32 v2, s0, v6 +; GFX9-NEXT: v_xor_b32_e32 v3, s1, v8 +; GFX9-NEXT: v_mov_b32_e32 v6, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v8, vcc -; GFX9-NEXT: v_xor_b32_e32 v7, s6, v7 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v6, vcc +; GFX9-NEXT: v_xor_b32_e32 v6, s8, v9 ; GFX9-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-NEXT: v_xor_b32_e32 v8, s6, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, s6 -; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s6, v7 -; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v8, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v7, s8, v7 +; GFX9-NEXT: v_mov_b32_e32 v8, s8 +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s8, v6 +; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v8, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_dwordx4 v13, v[0:3], s[12:13] -; GFX9-NEXT: global_store_dwordx4 v13, v[4:7], s[14:15] +; GFX9-NEXT: global_store_dwordx4 v13, v[0:3], s[4:5] +; GFX9-NEXT: global_store_dwordx4 v13, v[4:7], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_v2i64: @@ -1979,276 +1933,270 @@ ; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s10 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_xor_b64 s[14:15], s[6:7], s[12:13] -; GFX10-NEXT: s_sub_u32 s3, 0, s10 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; GFX10-NEXT: s_subb_u32 s6, 0, s11 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 ; GFX10-NEXT: v_trunc_f32_e32 v2, v2 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 -; GFX10-NEXT: v_mul_f32_e32 v3, 0xcf800000, v2 -; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 -; GFX10-NEXT: v_add_f32_e32 v0, v3, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s20, v2 -; GFX10-NEXT: v_trunc_f32_e32 v3, v4 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX10-NEXT: v_mul_lo_u32 v6, s21, v0 -; GFX10-NEXT: v_mul_hi_u32 v7, s20, v0 -; GFX10-NEXT: v_add_f32_e32 v1, v4, v1 -; GFX10-NEXT: v_mul_lo_u32 v4, s20, v0 -; GFX10-NEXT: v_mul_lo_u32 v8, s3, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX10-NEXT: v_add3_u32 v5, v6, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v7, s6, v1 -; GFX10-NEXT: v_mul_hi_u32 v9, s3, v1 -; GFX10-NEXT: v_mul_lo_u32 v12, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v11, v0, v4 -; GFX10-NEXT: v_mul_hi_u32 v4, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v13, v2, v5 -; GFX10-NEXT: v_mul_lo_u32 v10, s3, v1 -; GFX10-NEXT: v_mul_hi_u32 v14, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v2, v5 -; GFX10-NEXT: v_add3_u32 v7, v7, v8, v9 -; GFX10-NEXT: v_add_co_u32 v6, s7, v6, v12 +; GFX10-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v1 +; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v2 +; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX10-NEXT: v_mul_lo_u32 v7, s20, v5 +; GFX10-NEXT: v_trunc_f32_e32 v4, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v0 +; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s20, v6, 0 +; GFX10-NEXT: v_mul_lo_u32 v8, s21, v6 +; GFX10-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v4 +; GFX10-NEXT: s_sub_u32 s3, 0, s10 +; GFX10-NEXT: s_subb_u32 s6, 0, s11 +; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX10-NEXT: v_mul_lo_u32 v9, s3, v3 +; GFX10-NEXT: v_add3_u32 v7, v1, v7, v8 +; GFX10-NEXT: v_mul_lo_u32 v10, v5, v0 +; GFX10-NEXT: v_mul_hi_u32 v11, v6, v0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mul_lo_u32 v8, s6, v4 +; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 +; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 +; GFX10-NEXT: v_mul_lo_u32 v13, v5, v7 +; GFX10-NEXT: v_mul_hi_u32 v14, v6, v7 +; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX10-NEXT: v_add3_u32 v2, v2, v9, v8 +; GFX10-NEXT: v_add_co_u32 v10, s7, v10, v12 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s7 -; GFX10-NEXT: v_add_co_u32 v4, s7, v13, v4 -; GFX10-NEXT: v_mul_lo_u32 v8, v3, v10 +; GFX10-NEXT: v_add_co_u32 v0, s7, v13, v0 +; GFX10-NEXT: v_mul_lo_u32 v8, v3, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s7 -; GFX10-NEXT: v_mul_lo_u32 v15, v1, v7 -; GFX10-NEXT: v_add_co_u32 v6, s7, v6, v11 -; GFX10-NEXT: v_mul_hi_u32 v9, v1, v10 -; GFX10-NEXT: v_mul_hi_u32 v10, v3, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s7 -; GFX10-NEXT: v_add_co_u32 v4, s7, v4, v14 -; GFX10-NEXT: v_mul_lo_u32 v14, v3, v7 +; GFX10-NEXT: v_mul_lo_u32 v15, v4, v2 +; GFX10-NEXT: v_add_co_u32 v10, s7, v10, v11 +; GFX10-NEXT: v_mul_hi_u32 v9, v4, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v3, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s7 +; GFX10-NEXT: v_add_co_u32 v0, s7, v0, v14 +; GFX10-NEXT: v_mul_lo_u32 v14, v3, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s7 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v12, v6 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v12, v10 ; GFX10-NEXT: v_add_co_u32 v8, s7, v8, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s7 +; GFX10-NEXT: v_mul_hi_u32 v16, v4, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 -; GFX10-NEXT: v_mul_hi_u32 v16, v1, v7 -; GFX10-NEXT: v_add_co_u32 v10, s7, v14, v10 +; GFX10-NEXT: v_add_co_u32 v1, s7, v14, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s7 -; GFX10-NEXT: v_add_co_u32 v4, s7, v4, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s7 +; GFX10-NEXT: v_add_co_u32 v0, s7, v0, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v8, s7, v8, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 -; GFX10-NEXT: v_add_co_u32 v9, s7, v10, v16 -; GFX10-NEXT: v_add3_u32 v5, v11, v6, v5 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo -; GFX10-NEXT: v_mul_hi_u32 v7, v3, v7 -; GFX10-NEXT: v_add_co_u32 v4, s7, v9, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v13, v10 -; GFX10-NEXT: v_mul_lo_u32 v5, s20, v0 -; GFX10-NEXT: v_mul_lo_u32 v9, s21, v0 -; GFX10-NEXT: v_mul_hi_u32 v10, s20, v0 -; GFX10-NEXT: v_mul_lo_u32 v11, s20, v2 +; GFX10-NEXT: v_add_co_u32 v9, s7, v1, v16 +; GFX10-NEXT: v_add3_u32 v7, v11, v10, v7 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v8, v12, v8 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v2, v3, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s7, s20, v6, 0 +; GFX10-NEXT: v_add_co_u32 v7, s7, v9, v8 +; GFX10-NEXT: v_mul_lo_u32 v9, s21, v6 +; GFX10-NEXT: v_mul_lo_u32 v11, s20, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 -; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v4 -; GFX10-NEXT: v_mul_hi_u32 v4, v2, v5 -; GFX10-NEXT: v_add3_u32 v6, v6, v8, v7 -; GFX10-NEXT: v_mul_lo_u32 v7, v2, v5 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v5 -; GFX10-NEXT: v_add3_u32 v5, v9, v11, v10 -; GFX10-NEXT: v_mul_lo_u32 v9, s6, v1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo -; GFX10-NEXT: v_mul_hi_u32 v10, s3, v1 -; GFX10-NEXT: v_mul_lo_u32 v12, v0, v5 -; GFX10-NEXT: v_mul_lo_u32 v13, v2, v5 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v4, v7 +; GFX10-NEXT: v_add3_u32 v2, v10, v8, v2 +; GFX10-NEXT: v_mul_lo_u32 v8, v5, v0 +; GFX10-NEXT: v_add3_u32 v7, v1, v11, v9 +; GFX10-NEXT: v_mul_hi_u32 v10, v6, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v2, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mul_lo_u32 v9, s6, v4 ; GFX10-NEXT: v_mul_lo_u32 v11, s3, v3 -; GFX10-NEXT: v_mul_lo_u32 v6, s3, v1 -; GFX10-NEXT: v_mul_hi_u32 v14, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v2, v5 -; GFX10-NEXT: v_add_co_u32 v7, s3, v7, v12 -; GFX10-NEXT: v_add3_u32 v9, v9, v11, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v4, s3, v13, v4 +; GFX10-NEXT: v_mul_lo_u32 v13, v5, v7 +; GFX10-NEXT: v_mul_hi_u32 v14, v6, v7 +; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX10-NEXT: v_add_co_u32 v8, s3, v8, v12 +; GFX10-NEXT: v_mul_lo_u32 v15, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v16, v4, v1 +; GFX10-NEXT: v_add3_u32 v2, v2, v11, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s3 +; GFX10-NEXT: v_add_co_u32 v0, s3, v13, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v7, s3, v7, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v4, s3, v4, v14 +; GFX10-NEXT: v_add_co_u32 v8, s3, v8, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s3 +; GFX10-NEXT: v_add_co_u32 v0, s3, v0, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s3 +; GFX10-NEXT: v_mul_lo_u32 v12, v4, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v8, v9, v8 +; GFX10-NEXT: v_mul_hi_u32 v1, v3, v1 +; GFX10-NEXT: v_mul_lo_u32 v13, v3, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v11, v10 +; GFX10-NEXT: v_mul_hi_u32 v9, v4, v2 +; GFX10-NEXT: v_add_co_u32 v0, s3, v0, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s3 -; GFX10-NEXT: v_mul_lo_u32 v15, v3, v6 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v10, v7 -; GFX10-NEXT: v_mul_lo_u32 v12, v1, v9 -; GFX10-NEXT: v_mul_hi_u32 v16, v1, v6 -; GFX10-NEXT: v_add_nc_u32_e32 v8, v11, v8 -; GFX10-NEXT: v_mul_hi_u32 v6, v3, v6 -; GFX10-NEXT: v_add_co_u32 v4, s3, v4, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s3 -; GFX10-NEXT: v_mul_lo_u32 v13, v3, v9 -; GFX10-NEXT: v_mul_hi_u32 v10, v1, v9 ; GFX10-NEXT: v_add_co_u32 v11, s3, v15, v12 -; GFX10-NEXT: v_add3_u32 v5, v8, v7, v5 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v6, v0 +; GFX10-NEXT: v_add3_u32 v7, v10, v8, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v6, s3, v13, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32 v1, s3, v13, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v7, s3, v11, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v4, s3, v6, v10 -; GFX10-NEXT: v_mul_lo_u32 v6, s1, v0 -; GFX10-NEXT: v_mul_lo_u32 v8, s0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: v_add_co_u32 v8, s3, v11, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s3 +; GFX10-NEXT: v_add_co_u32 v1, s3, v1, v9 +; GFX10-NEXT: v_mul_lo_u32 v7, s1, v0 +; GFX10-NEXT: v_mul_lo_u32 v9, s0, v5 ; GFX10-NEXT: v_mul_hi_u32 v10, s1, v0 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v11, s1, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s3 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v12, v7 -; GFX10-NEXT: v_mul_hi_u32 v12, s0, v2 -; GFX10-NEXT: v_mul_hi_u32 v9, v3, v9 -; GFX10-NEXT: v_add_co_u32 v6, s3, v6, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s3 +; GFX10-NEXT: v_mul_lo_u32 v11, s1, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s3 +; GFX10-NEXT: v_add_nc_u32_e32 v8, v12, v8 +; GFX10-NEXT: v_mul_hi_u32 v12, s0, v5 +; GFX10-NEXT: v_mul_hi_u32 v5, s1, v5 +; GFX10-NEXT: v_add_co_u32 v7, s3, v7, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s3 ; GFX10-NEXT: v_add_co_u32 v10, s3, v11, v10 -; GFX10-NEXT: v_add_co_u32 v0, s6, v6, v0 +; GFX10-NEXT: v_add_co_u32 v0, s6, v7, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s3 ; GFX10-NEXT: v_add_co_u32 v10, s3, v10, v12 ; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s3 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v8, v0 -; GFX10-NEXT: v_add_co_u32 v4, s3, v4, v7 -; GFX10-NEXT: v_mul_hi_u32 v2, s1, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v0, s3, v10, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v13, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s3 -; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v4 -; GFX10-NEXT: v_add3_u32 v5, v5, v7, v9 -; GFX10-NEXT: v_mul_lo_u32 v4, s9, v0 -; GFX10-NEXT: v_add3_u32 v2, v6, v8, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, s15, v1 -; GFX10-NEXT: v_mul_lo_u32 v8, s8, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo -; GFX10-NEXT: v_mul_hi_u32 v5, s8, v0 -; GFX10-NEXT: v_mul_lo_u32 v6, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v9, s14, v1 -; GFX10-NEXT: v_mul_lo_u32 v11, s14, v3 -; GFX10-NEXT: v_mul_hi_u32 v1, s15, v1 -; GFX10-NEXT: v_mul_lo_u32 v12, s15, v3 -; GFX10-NEXT: v_mul_hi_u32 v13, s14, v3 -; GFX10-NEXT: v_mul_hi_u32 v3, s15, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v9, v0 +; GFX10-NEXT: v_add_co_u32 v8, s3, v1, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s3 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v11 +; GFX10-NEXT: v_add_co_u32 v9, s3, v10, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX10-NEXT: v_mul_hi_u32 v2, v3, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v13, v6 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v4, v8 +; GFX10-NEXT: v_add3_u32 v5, v7, v0, v5 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX10-NEXT: v_add3_u32 v4, v4, v6, v5 -; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, s0, v8 -; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v11 -; GFX10-NEXT: v_sub_nc_u32_e32 v8, s1, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v4, s0, s1, v4, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v5 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v4 -; GFX10-NEXT: v_add_co_u32 v6, s1, v6, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v15, vcc_lo, v5, s8 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s0, 0, v8, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v7, v6 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v10, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v14, v11, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v16 +; GFX10-NEXT: v_mul_hi_u32 v8, s14, v4 +; GFX10-NEXT: v_add3_u32 v2, v6, v1, v2 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s8, v9, 0 +; GFX10-NEXT: v_mul_lo_u32 v6, s9, v9 +; GFX10-NEXT: v_mul_lo_u32 v7, s8, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v2, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v3, s15, v4 +; GFX10-NEXT: v_mul_hi_u32 v4, s15, v4 +; GFX10-NEXT: v_mul_lo_u32 v10, s14, v2 +; GFX10-NEXT: v_mul_lo_u32 v11, s15, v2 +; GFX10-NEXT: v_add3_u32 v1, v1, v7, v6 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v9, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v5, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v12, s1, v1 +; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v14, s0, s1, v1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s9, v12, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v13, s8 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v15, s0, 0, v0, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v14 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s9, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v12 ; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v12, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v13, s0, v0, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v2, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v16 -; GFX10-NEXT: v_add_nc_u32_e32 v9, v9, v12 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v17, v14, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v1, v6 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 +; GFX10-NEXT: v_add_co_u32 v19, s0, v6, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v20, s0, 0, v7, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v1, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v17, v18, v17, s0 +; GFX10-NEXT: v_add_co_u32 v1, s0, v3, v10 +; GFX10-NEXT: v_mul_hi_u32 v10, s14, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v4, s0, v11, v4 +; GFX10-NEXT: v_add_co_u32 v1, s1, v1, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v2, s15, v2 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v17 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v8, v10 +; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, v13, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v18, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 -; GFX10-NEXT: v_add3_u32 v3, v9, v1, v3 -; GFX10-NEXT: v_sub_co_u32 v1, s0, v15, s8 -; GFX10-NEXT: v_mul_hi_u32 v17, s10, v6 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v13, v12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v12, v18, v14, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v13, s11, v6 -; GFX10-NEXT: v_mul_lo_u32 v14, s10, v3 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v8, s0, 0, v8, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7 -; GFX10-NEXT: v_mul_lo_u32 v7, s10, v6 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo -; GFX10-NEXT: v_add3_u32 v9, v13, v14, v17 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v15, v1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v16, v8, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v7, s0, s14, v7 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v11, s1, s15, v9, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s15, v9 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s11, v11 -; GFX10-NEXT: v_xor_b32_e32 v0, s18, v0 -; GFX10-NEXT: v_xor_b32_e32 v2, s19, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, vcc_lo, s11, v1, s0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v7, s10 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s0, 0, v9, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v8, s0, v12, s8 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v10, s0, 0, v0, s0 +; GFX10-NEXT: v_add3_u32 v2, v3, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v19, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, v7, v20, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v4, 0 +; GFX10-NEXT: v_mul_lo_u32 v7, s10, v2 +; GFX10-NEXT: v_mul_lo_u32 v11, s11, v4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v16 +; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v12, v8, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc_lo +; GFX10-NEXT: v_add3_u32 v1, v1, v7, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v15, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, v13, v8, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v8, s0, s14, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s1, s15, v1, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s15, v1 +; GFX10-NEXT: v_xor_b32_e32 v0, s18, v3 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s11, v9 +; GFX10-NEXT: v_xor_b32_e32 v3, s19, v5 +; GFX10-NEXT: v_xor_b32_e32 v6, s2, v6 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v10, vcc_lo, s11, v1, s0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v8, s10 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v10, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v0, s0, v0, s18 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s19, v2, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v11 -; GFX10-NEXT: v_xor_b32_e32 v2, s2, v5 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s11, v9, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s10, v13 -; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 -; GFX10-NEXT: v_add_co_u32 v15, s0, v6, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v3, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, v15, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: v_sub_co_u32 v8, s0, v13, s10 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v12, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s19, v3, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v9 +; GFX10-NEXT: v_xor_b32_e32 v3, s2, v7 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, s11, v10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v11, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s10, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 +; GFX10-NEXT: v_add_co_u32 v14, s0, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, 0, v2, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0 +; GFX10-NEXT: v_add_co_u32 v11, s0, v14, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v15, s0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 +; GFX10-NEXT: v_sub_co_u32 v7, s0, v12, s10 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v10, s0, 0, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v11, v14, v11, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v5 -; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v5, v13, v8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v8, v14, v9, vcc_lo -; GFX10-NEXT: v_xor_b32_e32 v9, s2, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v12, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v5, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v8, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v14, v15, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, v13, v10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v10, v4, v11, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v9, v7, s0 ; GFX10-NEXT: s_xor_b64 s[0:1], s[12:13], s[16:17] -; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v2, s2 -; GFX10-NEXT: v_xor_b32_e32 v2, s0, v6 -; GFX10-NEXT: v_xor_b32_e32 v3, s1, v3 -; GFX10-NEXT: v_xor_b32_e32 v6, s12, v7 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s2, v9, vcc_lo -; GFX10-NEXT: v_xor_b32_e32 v7, s12, v8 -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v2, s0 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v6, s12 +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v3, s2 +; GFX10-NEXT: v_xor_b32_e32 v3, s0, v10 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s2, v6, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v6, s1, v2 +; GFX10-NEXT: v_xor_b32_e32 v8, s12, v8 +; GFX10-NEXT: v_xor_b32_e32 v7, s12, v7 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v8, s12 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s12, v7, vcc_lo ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[4:5] -; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[6:7] +; GFX10-NEXT: global_store_dwordx4 v16, v[0:3], s[4:5] +; GFX10-NEXT: global_store_dwordx4 v16, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64> addrspace(1)* %out0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -21,141 +21,132 @@ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v0, v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v0 -; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v6, vcc -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, 0, v1 -; CHECK-NEXT: v_subb_u32_e32 v8, vcc, 0, v0, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v8, v2 -; CHECK-NEXT: v_mul_lo_u32 v10, v7, v5 -; CHECK-NEXT: v_mul_hi_u32 v12, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v11, v7, v2 -; CHECK-NEXT: v_xor_b32_e32 v3, v3, v6 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v11 -; CHECK-NEXT: v_mul_lo_u32 v12, v2, v9 -; CHECK-NEXT: v_mul_hi_u32 v13, v2, v11 -; CHECK-NEXT: v_mul_hi_u32 v11, v5, v11 -; CHECK-NEXT: v_xor_b32_e32 v4, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_xor_b32_e32 v3, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v3 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v13, v5, v9 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v8, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, v7, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v7, v2 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v10 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v11, v2, v10 -; CHECK-NEXT: v_mul_hi_u32 v10, v5, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v5, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v4, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v11, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mul_hi_u32 v9, v7, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v4, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v5, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v5 -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v4, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, v5, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v9, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v9, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v0 -; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v3, v1 -; CHECK-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v2, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v0 -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v7, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] -; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 -; CHECK-NEXT: v_xor_b32_e32 v2, v0, v6 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v1, v6 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr2 ; CHECK-NEXT: ; implicit-def: $vgpr4 ; CHECK-NEXT: .LBB0_2: ; %Flow @@ -213,126 +204,117 @@ ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s9 ; CHECK-NEXT: s_mov_b32 s7, s6 ; CHECK-NEXT: s_xor_b64 s[10:11], s[10:11], s[6:7] -; CHECK-NEXT: s_sub_u32 s0, 0, s8 +; CHECK-NEXT: s_sub_u32 s3, 0, s8 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: s_subb_u32 s1, 0, s9 +; CHECK-NEXT: s_subb_u32 s5, 0, s9 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v1, v1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 -; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v2, s0, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s0, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; CHECK-NEXT: v_mul_lo_u32 v2, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v3, s0, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v6, s9 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v6, v3, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0 ; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, s10, v0 +; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0 ; CHECK-NEXT: v_mul_hi_u32 v0, s11, v0 -; CHECK-NEXT: v_mov_b32_e32 v4, s11 +; CHECK-NEXT: v_mul_hi_u32 v5, s11, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, s11, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_mul_hi_u32 v3, s10, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v1, s11, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v2, s9, v0 -; CHECK-NEXT: v_mul_lo_u32 v1, s8, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, s8, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s8, v0 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s10, v3 -; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v4, v0, vcc -; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], s11, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v5, s11 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v3, s9 +; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] -; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v1 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[0:1] -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v1 -; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v5, s[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s9, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc ; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s8, v3 -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 ; CHECK-NEXT: s_mov_b32 s1, 0 @@ -384,280 +366,266 @@ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v5 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v9, v4, v8 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v9 -; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v5 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v5, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v4, v8 +; GISEL-NEXT: v_xor_b32_e32 v8, v9, v8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v5 +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, 0, v8, vcc +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v11, v9 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v11 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v10 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v15, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v12, v9 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11] +; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v16, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v4 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v10 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5] ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v10 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11] ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 -; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 -; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v8 -; GISEL-NEXT: v_trunc_f32_e32 v10, v10 -; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v10 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 -; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v16, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v11, v8 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v15 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 -; GISEL-NEXT: v_mul_hi_u32 v17, v8, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v10, v15 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v17, v10, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_mul_hi_u32 v13, v10, v13 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v12, v8 -; GISEL-NEXT: v_mul_lo_u32 v13, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v14, v11, v8 -; GISEL-NEXT: v_mul_hi_u32 v11, v11, v8 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v14 -; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11 -; GISEL-NEXT: v_mul_hi_u32 v15, v8, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_xor_b32_e32 v11, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v15, v9 +; GISEL-NEXT: v_mul_lo_u32 v13, v12, v10 +; GISEL-NEXT: v_xor_b32_e32 v14, v1, v4 +; GISEL-NEXT: v_mul_hi_u32 v1, v12, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v1, v8 -; GISEL-NEXT: v_mul_lo_u32 v12, v0, v10 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v15, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v14, v0 +; GISEL-NEXT: v_mul_lo_u32 v10, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v12, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v14, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 -; GISEL-NEXT: v_mul_lo_u32 v10, v9, v10 -; GISEL-NEXT: v_mul_lo_u32 v12, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v8, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v0, v9 -; GISEL-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v1, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v9 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v5 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v5, vcc -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 -; GISEL-NEXT: v_xor_b32_e32 v5, v7, v5 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; GISEL-NEXT: v_trunc_f32_e32 v8, v8 -; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v1, v14, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v1, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v12, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GISEL-NEXT: v_xor_b32_e32 v13, v6, v12 +; GISEL-NEXT: v_xor_b32_e32 v12, v7, v12 +; GISEL-NEXT: v_cvt_f32_u32_e32 v15, v13 +; GISEL-NEXT: v_cvt_f32_u32_e32 v16, v12 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_mac_f32_e32 v15, 0x4f800000, v16 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v10, v[1:2] +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v15 +; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 0, v13 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v9, v[6:7] +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v1 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v1 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v7 -; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v15, v10, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v7 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_mul_lo_u32 v13, v8, v14 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v7, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; GISEL-NEXT: v_mul_hi_u32 v15, v7, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v11, v7 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v13, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v10, v10, v7 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v13 -; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v12, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_subb_u32_e32 v17, vcc, 0, v12, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v16, v15, 0 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v11, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v10 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v7, v[0:1] +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v14, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v15, v[0:1] +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v14, v6 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v15, v0 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v8 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v15, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[6:7] +; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v11, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v19, s[6:7], 0, v1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v19, v8 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v8 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v14, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, v19, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v18, v1 +; GISEL-NEXT: v_mul_hi_u32 v18, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, v2, v8 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; GISEL-NEXT: v_mul_hi_u32 v11, v2, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v4, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v1 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v9, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v16, v7, v[1:2] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc +; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v17, v9, v[5:6] +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v1, v11, v4 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v10 +; GISEL-NEXT: v_mul_lo_u32 v2, v7, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v9, v5 +; GISEL-NEXT: v_xor_b32_e32 v14, v3, v10 +; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v7, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v7, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v7, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v14, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_xor_b32_e32 v7, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v14, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v7, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v8, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v14, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v14, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v12 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v2, v13 +; GISEL-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v5 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v6 -; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v8, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v12 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v6, v13 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v10 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v10 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64: @@ -678,141 +646,132 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v5, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v0, v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v11 -; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v4 -; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v4, vcc -; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v2 -; CGP-NEXT: v_trunc_f32_e32 v10, v10 -; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v1 -; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v0, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v12, v2 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v16, v11, v2 -; CGP-NEXT: v_mul_lo_u32 v15, v11, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v4 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_mul_lo_u32 v14, v10, v15 -; CGP-NEXT: v_mul_lo_u32 v16, v2, v13 -; CGP-NEXT: v_mul_hi_u32 v17, v2, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 -; CGP-NEXT: v_xor_b32_e32 v5, v5, v4 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_xor_b32_e32 v3, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v2, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v16, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v17, v10, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_hi_u32 v16, v2, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v1 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v16, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v17, v0 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v12, v2 -; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v14, v11, v2 -; CGP-NEXT: v_mul_hi_u32 v11, v11, v2 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v14 -; CGP-NEXT: v_mul_lo_u32 v13, v2, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v2, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2] +; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2] +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v11, v2, v12 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v5, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_xor_b32_e32 v10, v10, v12 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v13, v2, v11 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v15, v14, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v13, v5, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v11, v10, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v5, v2 -; CGP-NEXT: v_mul_lo_u32 v12, v3, v10 -; CGP-NEXT: v_mul_hi_u32 v13, v3, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v5, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v12, v3, v10 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_hi_u32 v1, v14, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v14, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v13, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v10, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_mul_hi_u32 v5, v11, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_mul_lo_u32 v11, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v1, v10 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v1, v2 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v12 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v5, v2, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v5, v2 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v0 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v13, v5 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v0, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v13, 0 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v14, v2 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v13, v[1:2] +; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v10, v1, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v10, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v0 -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v11, vcc, v3, v1 -; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v2, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v3 +; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v0 -; CGP-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 ; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_xor_b32_e32 v2, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v12 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v12 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 ; CGP-NEXT: .LBB2_2: ; %Flow1 @@ -851,141 +810,132 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v7, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v2, v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v9 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v6 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v9, v6, vcc -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 -; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v8 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v4 -; CGP-NEXT: v_mul_lo_u32 v13, v9, v4 -; CGP-NEXT: v_xor_b32_e32 v5, v5, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_mul_lo_u32 v12, v8, v13 -; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v4, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v13 -; CGP-NEXT: v_xor_b32_e32 v7, v7, v6 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_xor_b32_e32 v5, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v6, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v5 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v6, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v8, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v11, v9, v8 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v9, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v12 -; CGP-NEXT: v_mul_lo_u32 v11, v4, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v10 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_xor_b32_e32 v8, v8, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v9 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v8 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v8, v2 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v8, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v8 -; CGP-NEXT: v_mul_lo_u32 v10, v3, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v7, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v2, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v11, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v8, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v8, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v2 -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v5, v3 -; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v2 -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v4, v2, vcc -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v6 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_xor_b32_e32 v4, v2, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v3, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v4, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; CGP-NEXT: ; implicit-def: $vgpr6 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB2_6: ; %Flow @@ -1022,139 +972,131 @@ ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s5, 0xf000 -; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: s_movk_i32 s7, 0xf000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v4, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v4 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v5, 0 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_mul_lo_u32 v4, v6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v3, vcc +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v5, 0 +; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_xor_b32_e32 v4, v0, v7 +; CHECK-NEXT: v_mul_lo_u32 v0, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_xor_b32_e32 v9, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v1, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v3, 0x1000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v5, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v7 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = srem i64 %num, 4096 ret i64 %result @@ -1165,282 +1107,266 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_movk_i32 s10, 0x1000 -; GISEL-NEXT: s_mov_b32 s6, 0 +; GISEL-NEXT: s_mov_b32 s8, 0 ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: s_mov_b32 s7, s6 +; GISEL-NEXT: s_mov_b32 s9, s8 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[6:7] -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 -; GISEL-NEXT: s_sub_u32 s4, 0, s8 -; GISEL-NEXT: s_subb_u32 s5, 0, s9 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[8:9] +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s7 +; GISEL-NEXT: s_sub_u32 s11, 0, s6 +; GISEL-NEXT: s_subb_u32 s12, 0, s7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s11, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v8, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s12, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v9, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s11, v8, v[4:5] +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s12, v9, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v6 +; GISEL-NEXT: v_xor_b32_e32 v11, v1, v4 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v0, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v7, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s9, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, s8, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s8, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v5, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v7, v0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v9, s7 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v5 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0 -; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v7 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s8, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v5, s[4:5] ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s7 -; GISEL-NEXT: s_sub_u32 s4, 0, s6 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GISEL-NEXT: s_subb_u32 s5, 0, s7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[8:9] +; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v0, v9, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s6, v7 +; GISEL-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v5, vcc +; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v11 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v13, v13 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v12 +; GISEL-NEXT: s_sub_u32 s7, 0, s8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, v1, v6, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v14, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v5, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v13, v[1:2] +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s6, v11 +; GISEL-NEXT: s_subb_u32 s6, 0, s9 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v14, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v14, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v14, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s7, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, s6, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s6, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v5, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v13, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v12, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v11, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v9, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v11, v5 +; GISEL-NEXT: v_xor_b32_e32 v10, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v10, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v6 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s8, v11, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s8, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s9, v11, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_mov_b32_e32 v8, s9 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v6 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 -; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v8 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v7 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v9 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s8, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2k_denom: @@ -1452,263 +1378,248 @@ ; CGP-NEXT: s_movk_i32 s7, 0xf000 ; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v6 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v4, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6] +; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v9, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v6 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v6 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v8, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 -; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 -; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v11, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v11, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v5, v[1:2] +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v10, v0 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v6, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, v6, v1, s[4:5] +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v6, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_mov_b32_e32 v14, s4 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v13, 0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v14, v14, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v15, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v15, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v15, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v12, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v12, v0 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v12, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v7, v12, v2 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v6, v[0:1] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v8, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v7, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -1718,139 +1629,131 @@ ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 -; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v4, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v4 +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v5, 0 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_mul_lo_u32 v4, v6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 +; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v3, vcc +; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v5, 0 +; CHECK-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s7, v6, v[3:4] +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] +; CHECK-NEXT: v_xor_b32_e32 v4, v0, v7 +; CHECK-NEXT: v_mul_lo_u32 v0, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 +; CHECK-NEXT: v_xor_b32_e32 v9, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v1, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v3, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v9, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 -; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], s6, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2] +; CHECK-NEXT: v_mov_b32_e32 v3, 0x12d8fb +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v5, s6 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v7 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = srem i64 %num, 1235195 ret i64 %result @@ -1861,282 +1764,266 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s10, 0x12d8fb -; GISEL-NEXT: s_mov_b32 s6, 0 +; GISEL-NEXT: s_mov_b32 s8, 0 ; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: s_mov_b32 s7, s6 +; GISEL-NEXT: s_mov_b32 s9, s8 ; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[6:7] -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s9 -; GISEL-NEXT: s_sub_u32 s4, 0, s8 -; GISEL-NEXT: s_subb_u32 s5, 0, s9 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v0, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v0, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s9, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, s8, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s8, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0 -; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v7 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v8 -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s8, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: s_add_u32 s4, s10, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: s_addc_u32 s5, 0, 0 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s7 -; GISEL-NEXT: s_sub_u32 s4, 0, s6 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GISEL-NEXT: s_subb_u32 s5, 0, s7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 -; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, s5, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, s4, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, s4, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], s[8:9] +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s7 +; GISEL-NEXT: s_sub_u32 s11, 0, s6 +; GISEL-NEXT: s_subb_u32 s12, 0, s7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s11, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v8, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s12, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 -; GISEL-NEXT: v_mov_b32_e32 v9, s7 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s11, v9, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s11, v8, v[4:5] +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s12, v9, v[6:7] +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_xor_b32_e32 v7, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v8, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v9, v6 +; GISEL-NEXT: v_xor_b32_e32 v11, v1, v4 +; GISEL-NEXT: v_mul_hi_u32 v1, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v6, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v6, v7, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, s7, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, s6, v6 -; GISEL-NEXT: v_mul_lo_u32 v8, s6, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s6, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v6 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 -; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v5, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v7, v0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v9, s7 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v5 ; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v7 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v8 -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v5, s[4:5] +; GISEL-NEXT: s_add_u32 s4, s10, 0 +; GISEL-NEXT: s_addc_u32 s5, 0, 0 +; GISEL-NEXT: s_xor_b64 s[8:9], s[4:5], s[8:9] +; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v0, v9, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s6, v7 +; GISEL-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v5, vcc +; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v11 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v13, v13 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v12 +; GISEL-NEXT: s_sub_u32 s7, 0, s8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, v1, v6, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v14, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v5, v9, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v13, v[1:2] +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s6, v11 +; GISEL-NEXT: s_subb_u32 s6, 0, s9 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v14, v[5:6] +; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v14, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v14, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v5, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v0 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v13, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v12, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v7, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v11, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v9, v2, v7 +; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v11, v5 +; GISEL-NEXT: v_xor_b32_e32 v10, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v10, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v9, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v0 +; GISEL-NEXT: v_xor_b32_e32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s8, v11, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s8, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s9, v11, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_mov_b32_e32 v8, s9 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s9, v9 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s8, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_oddk_denom: @@ -2148,263 +2035,248 @@ ; CGP-NEXT: s_mov_b32 s7, 0xffed2705 ; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 +; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v6 +; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v4, v7, 0 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v8, v[5:6] +; CGP-NEXT: v_mul_hi_u32 v9, v7, v4 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6] +; CGP-NEXT: v_mul_lo_u32 v6, v8, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 -; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v7, v4 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v5, vcc +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v9, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v6 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7] +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v0, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v7, v9, v6 +; CGP-NEXT: v_xor_b32_e32 v11, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v8, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 -; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 -; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v8, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v5, v11, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v11, v0 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v11, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v1 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v5, v[1:2] +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v10, v0 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v6, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, v6, v1, s[4:5] +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 -; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v6, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_mov_b32_e32 v14, s4 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v13, 0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v14, v14, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v15, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v15, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v15, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s7, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v11, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v3, v12, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v7, v10, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v12, v0 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v12, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_mul_hi_u32 v6, v10, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v7, v12, v2 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v6, v[0:1] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v8, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v7, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -2429,141 +2301,132 @@ ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v6, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v0 -; CHECK-NEXT: v_xor_b32_e32 v0, v2, v0 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v0 -; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v5 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, 0, v1 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_subb_u32_e32 v8, vcc, 0, v0, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v8, v2 -; CHECK-NEXT: v_mul_lo_u32 v10, v7, v5 -; CHECK-NEXT: v_mul_hi_u32 v12, v7, v2 -; CHECK-NEXT: v_mul_lo_u32 v11, v7, v2 -; CHECK-NEXT: v_xor_b32_e32 v3, v3, v6 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v11 -; CHECK-NEXT: v_mul_lo_u32 v12, v2, v9 -; CHECK-NEXT: v_mul_hi_u32 v13, v2, v11 -; CHECK-NEXT: v_mul_hi_u32 v11, v5, v11 -; CHECK-NEXT: v_xor_b32_e32 v4, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v5 +; CHECK-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v12, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v13, v5, v9 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v13, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc -; CHECK-NEXT: v_mul_lo_u32 v8, v8, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, v7, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v7, v2 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v10 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v11, v2, v10 -; CHECK-NEXT: v_mul_hi_u32 v10, v5, v10 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v11, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2] +; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2] +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v8, vcc +; CHECK-NEXT: v_xor_b32_e32 v4, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, v10, v0 +; CHECK-NEXT: v_mul_lo_u32 v9, v7, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, v10, v0 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v8 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v9, v2, v7 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v11, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_mul_hi_u32 v9, v7, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v11, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v9, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v4, v2 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_mul_hi_u32 v1, v10, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v10, v1, vcc +; CHECK-NEXT: v_mul_lo_u32 v2, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, v1, v5 -; CHECK-NEXT: v_mul_lo_u32 v8, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v4, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v0, v2 +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 +; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v9, v[1:2] +; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v3, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v0 -; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v3, v1 -; CHECK-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v2, vcc -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v6 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5 +; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v0 -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v7, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v6 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] -; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 -; CHECK-NEXT: v_xor_b32_e32 v2, v0, v6 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v1, v6 -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 ; CHECK-NEXT: ; implicit-def: $vgpr3 ; CHECK-NEXT: .LBB7_2: ; %Flow @@ -2605,281 +2468,268 @@ ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v5 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; GISEL-NEXT: v_xor_b32_e32 v8, v4, v7 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v5, v7, vcc +; GISEL-NEXT: v_xor_b32_e32 v5, v4, v7 +; GISEL-NEXT: v_xor_b32_e32 v7, v8, v7 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v7 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v5 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v7, vcc +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v10, v8 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v10 +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[6:7], v12, v11, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v9 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v12, v14, v[4:5] +; GISEL-NEXT: v_mul_lo_u32 v4, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v8 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v13, v11, v[9:10] +; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v15, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v4 +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v14, v8, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[6:7], v12, v11, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v9 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v12, v14, v[4:5] ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v9 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v13, v11, v[9:10] ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v8 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 -; GISEL-NEXT: v_trunc_f32_e32 v9, v9 -; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v7 -; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v15, v10, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v7 -; GISEL-NEXT: v_xor_b32_e32 v16, v0, v4 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_mul_lo_u32 v13, v9, v14 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v12 -; GISEL-NEXT: v_mul_hi_u32 v0, v7, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_mul_hi_u32 v15, v7, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v9, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v11, v10, v7 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v10, v0 +; GISEL-NEXT: v_xor_b32_e32 v12, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v14, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, v11, v9 ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v12 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 -; GISEL-NEXT: v_mul_hi_u32 v1, v0, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v14, v8 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v1 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v13, v10 -; GISEL-NEXT: v_mul_lo_u32 v11, v16, v7 -; GISEL-NEXT: v_lshl_b64 v[0:1], s[4:5], v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v16, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v13, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v13, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, v16, v7 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v9, v5, v6 -; GISEL-NEXT: v_mul_lo_u32 v7, v8, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v8, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v16, v10 -; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v13, v6, vcc -; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 -; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v6, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v7, v8 -; GISEL-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v6, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v5 -; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v6, v5, vcc -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v11, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v11, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v5, v12, v5, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; GISEL-NEXT: v_xor_b32_e32 v8, v0, v7 -; GISEL-NEXT: v_xor_b32_e32 v7, v1, v7 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v8 -; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 -; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v4 -; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4 -; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v9 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v9, vcc -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; GISEL-NEXT: v_mul_f32_e32 v3, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v3, v3 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v3 -; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v8 -; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v13, v10, v3 -; GISEL-NEXT: v_mul_hi_u32 v15, v10, v0 -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v0 -; GISEL-NEXT: v_xor_b32_e32 v16, v1, v9 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_mul_lo_u32 v13, v3, v14 -; GISEL-NEXT: v_mul_lo_u32 v15, v0, v12 -; GISEL-NEXT: v_mul_hi_u32 v1, v0, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v3, v14 -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v3, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1 -; GISEL-NEXT: v_mul_hi_u32 v15, v0, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_mul_hi_u32 v12, v3, v12 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v3, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v11, v0 -; GISEL-NEXT: v_mul_lo_u32 v11, v10, v1 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v10, v0 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v12 -; GISEL-NEXT: v_mul_lo_u32 v11, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v1, v12 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v14, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v9 +; GISEL-NEXT: v_lshl_b64 v[0:1], s[4:5], v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v12, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v1, v3 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v3 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v10 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v2, v10 -; GISEL-NEXT: v_mul_lo_u32 v12, v16, v3 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v6, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v5, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v16, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v9, v8 +; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc +; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v6, 0 +; GISEL-NEXT: v_xor_b32_e32 v14, v0, v11 +; GISEL-NEXT: v_xor_b32_e32 v15, v1, v11 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v14 +; GISEL-NEXT: v_cvt_f32_u32_e32 v16, v15 +; GISEL-NEXT: v_mov_b32_e32 v0, v9 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, v[0:1] +; GISEL-NEXT: v_mac_f32_e32 v11, 0x4f800000, v16 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v9, v11 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v6, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v17, vcc, 0, v14 +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v9 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_subb_u32_e32 v18, vcc, 0, v15, vcc +; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v16, 0 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_mov_b32_e32 v1, v10 +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v17, v6, v[1:2] +; GISEL-NEXT: v_mul_lo_u32 v1, v6, v9 +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v13, v0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v18, v16, v[10:11] +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v16, v10 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v7 +; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v0, v7, vcc +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v16, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, v1, v11, s[6:7] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v8, v5 +; GISEL-NEXT: v_subbrev_u32_e64 v19, s[6:7], 0, v0, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v19, v7 +; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v0, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v7 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v1, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; GISEL-NEXT: v_mul_hi_u32 v13, v16, v10 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v0 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v6, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v9, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v17, v10, v[1:2] +; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v18, v9, v[5:6] +; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v2, v10, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v9, v5 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v10, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v5, v2, v3 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v6, v16, v3 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v10, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v10, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4 -; GISEL-NEXT: v_mul_lo_u32 v3, v8, v3 -; GISEL-NEXT: v_mul_lo_u32 v6, v8, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v16, v6 -; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v2, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v5, v11, v2 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v0 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v5, v[0:1] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v7, v4, vcc +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v15, v9, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v4, v8 -; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v2, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v8 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v7 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v6, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 -; GISEL-NEXT: v_xor_b32_e32 v4, v2, v9 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v3, v9 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v4, v9, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v15 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v2, v14 +; GISEL-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v15 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v6, v14 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v8 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v8 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_pow2_shl_denom: @@ -2902,141 +2752,132 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v0 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v3, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v0, v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v9 -; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v4 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v4, vcc -; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v2 -; CGP-NEXT: v_trunc_f32_e32 v9, v9 -; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v9 -; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v1 -; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v0, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v11, v2 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v2 -; CGP-NEXT: v_mul_lo_u32 v14, v10, v2 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_mul_lo_u32 v13, v9, v14 -; CGP-NEXT: v_mul_lo_u32 v15, v2, v12 -; CGP-NEXT: v_mul_hi_u32 v16, v2, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v14 -; CGP-NEXT: v_xor_b32_e32 v8, v8, v4 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_xor_b32_e32 v3, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v4, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v2, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v13, v[1:2] +; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v10, v[1:2] +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v9, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v2, v12 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v1 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; CGP-NEXT: v_mul_hi_u32 v12, v9, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v11, v2 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v9 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v2 -; CGP-NEXT: v_mul_hi_u32 v10, v10, v2 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v9, v13 -; CGP-NEXT: v_mul_lo_u32 v12, v2, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v2, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v13, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v14, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v10, 0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v13, v[1:2] +; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v10, v[1:2] +; CGP-NEXT: v_add_i32_e32 v2, vcc, v8, v11 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v2, v11 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v9, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v12, v2, v10 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v14 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v14, v13, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v12, v10, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v10 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v8, v2 -; CGP-NEXT: v_mul_lo_u32 v11, v3, v9 -; CGP-NEXT: v_mul_hi_u32 v12, v3, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v8, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v3, v9 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_hi_u32 v1, v13, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v13, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v13, v8, v1 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v2 -; CGP-NEXT: v_mul_lo_u32 v9, v1, v9 -; CGP-NEXT: v_mul_lo_u32 v11, v1, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v1, v2 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v11 -; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v8, v2, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v8, v2 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v10, v9, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v0, v2 +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v12, 0 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v0 +; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v12, v[1:2] +; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v8, v1, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v0 -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v3, v1 -; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v2, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v0, v3 +; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v0 -; CGP-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 ; CGP-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_xor_b32_e32 v2, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v11 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 @@ -3076,141 +2917,132 @@ ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v10, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v2 -; CGP-NEXT: v_xor_b32_e32 v2, v4, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v3 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v8, vcc -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v7 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v4 -; CGP-NEXT: v_mul_lo_u32 v13, v9, v4 -; CGP-NEXT: v_xor_b32_e32 v5, v5, v8 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v13 -; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v15, v4, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 -; CGP-NEXT: v_xor_b32_e32 v6, v6, v8 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_xor_b32_e32 v6, v3, v2 +; CGP-NEXT: v_xor_b32_e32 v8, v4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc +; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CGP-NEXT: v_trunc_f32_e32 v4, v3 +; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v3 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v15, v7, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v11, v9, v7 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v9, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_lo_u32 v10, v7, v12 -; CGP-NEXT: v_mul_lo_u32 v11, v4, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v12 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v3, vcc +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4] +; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v10, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v4, v12, v2 +; CGP-NEXT: v_mul_lo_u32 v11, v9, v3 +; CGP-NEXT: v_mul_hi_u32 v2, v12, v2 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v13, v7, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v9 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v12, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 -; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v6, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v6, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v12, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v5, v2 +; CGP-NEXT: v_mul_lo_u32 v9, v7, v3 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v2 +; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 +; CGP-NEXT: v_mul_hi_u32 v12, v5, v3 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v4 -; CGP-NEXT: v_mul_lo_u32 v7, v3, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v3, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v10 -; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v6, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v6, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v2 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v2 -; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v5, v3 -; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v4, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v2 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v5, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v2, v4 +; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v4, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v11, v[3:4] +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v3, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v5, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v6 +; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v2 -; CGP-NEXT: v_subb_u32_e32 v2, vcc, v4, v2, vcc -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v3 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v7, v6 ; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 -; CGP-NEXT: v_xor_b32_e32 v4, v2, v8 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v3, v8 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v4, v8, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: .LBB8_6: ; %Flow @@ -3302,271 +3134,257 @@ ; GISEL-LABEL: v_srem_v2i64_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s6, 0xffffff -; GISEL-NEXT: v_and_b32_e32 v1, s6, v4 +; GISEL-NEXT: s_mov_b32 s10, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v1, s10, v4 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 ; GISEL-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 -; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v1 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v3, vcc ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_and_b32_e32 v5, s6, v0 -; GISEL-NEXT: v_and_b32_e32 v6, s6, v6 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v4, v4 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v0, v9, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, v8, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v12 -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v10 -; GISEL-NEXT: v_and_b32_e32 v0, s6, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v4, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v13, v2 -; GISEL-NEXT: v_mul_hi_u32 v13, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v2 -; GISEL-NEXT: v_mul_lo_u32 v9, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v2 -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, v10 -; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0, v5 -; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v7 -; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v9, v2, v7 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v11, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v11, v2 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_mul_hi_u32 v4, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v2 -; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 -; GISEL-NEXT: v_mul_lo_u32 v8, v1, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v5, v8 -; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v2, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v3 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v4, v1 -; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3 -; GISEL-NEXT: v_add_i32_e32 v3, vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; GISEL-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v3 -; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_mac_f32_e32 v11, 0x4f800000, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v11 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v8 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_and_b32_e32 v0, s10, v0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v7, v5 ; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v3 -; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v6, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 -; GISEL-NEXT: v_mul_lo_u32 v12, v8, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 -; GISEL-NEXT: v_mul_lo_u32 v11, v7, v12 -; GISEL-NEXT: v_mul_lo_u32 v13, v4, v10 -; GISEL-NEXT: v_mul_hi_u32 v5, v4, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v7 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v4 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 +; GISEL-NEXT: v_mul_lo_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; GISEL-NEXT: v_mul_hi_u32 v13, v4, v10 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v4 -; GISEL-NEXT: v_mul_lo_u32 v9, v8, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v10 -; GISEL-NEXT: v_mul_lo_u32 v9, v4, v7 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, 0, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v4, v10 -; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v4 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v4 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 0, v0 +; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] +; GISEL-NEXT: v_mul_hi_u32 v0, v9, v4 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v9, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 +; GISEL-NEXT: v_mul_lo_u32 v5, v12, v7 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v4 -; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5 -; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v2, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v11, v4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v5 -; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_mul_lo_u32 v5, v6, v2 -; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4 -; GISEL-NEXT: v_mul_lo_u32 v7, v3, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v3, v2 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v11, v7 -; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v12, v2, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v12, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v6 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v4, v3 -; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v5, v11, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v10, v4 +; GISEL-NEXT: v_and_b32_e32 v8, s10, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v4 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v7, 0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, 0, v8 +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v6, v[0:1] +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v9 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6] +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v10, v4 +; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v12 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v5, vcc +; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v11, v5 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0 +; GISEL-NEXT: v_trunc_f32_e32 v6, v4 +; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v0 +; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v8 +; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, v9, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v12, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v15, v[0:1] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v12, v[5:6] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v6, v16, v0, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v0, v15, v4 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v5 +; GISEL-NEXT: v_mul_hi_u32 v17, v12, v4 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v11, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v17 +; GISEL-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v1 +; GISEL-NEXT: v_subbrev_u32_e64 v18, s[6:7], 0, v11, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v17, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v18, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v19, v0, v19, s[6:7] +; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], v11, v3, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v15, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v15, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v16, v3 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_mul_hi_u32 v11, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_and_b32_e32 v16, s10, v2 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v15, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v2 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v15, v3, vcc +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v4, 0 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v17, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v5, v[0:1] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v17, v11, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v4, v[0:1] +; GISEL-NEXT: v_cndmask_b32_e32 v11, v18, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v5, v2 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v2 +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], 0, v16 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], v3, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v0 +; GISEL-NEXT: v_mul_hi_u32 v2, v5, v2 +; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], v6, v3 +; GISEL-NEXT: v_mul_hi_u32 v6, v4, v0 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v13, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v0, v5, v0 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v3, s[4:5], v6, v3 +; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v4, v2 +; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v5, v0, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v2 +; GISEL-NEXT: v_mul_lo_u32 v4, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v6, v7, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v11, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v12, v2 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v12, v0 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v7, v0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v4, v[0:1] +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v9 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v2, v8 +; GISEL-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v8 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v6 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v9 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v6, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v3 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v4, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_srem_v2i64_24bit: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -116,142 +116,133 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX8-NEXT: s_sub_u32 s0, 0, s10 -; GFX8-NEXT: s_subb_u32 s1, 0, s11 +; GFX8-NEXT: s_sub_u32 s2, 0, s10 +; GFX8-NEXT: s_subb_u32 s3, 0, s11 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v1, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX8-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_trunc_f32_e32 v2, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v6, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 -; GFX8-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, s11 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s8, v0 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_mul_hi_u32 v5, s9, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v5, s9, v1 +; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 ; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v1, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 -; GFX8-NEXT: v_mul_hi_u32 v7, s10, v0 -; GFX8-NEXT: v_mul_lo_u32 v5, s10, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s8, v5 -; GFX8-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v2, vcc -; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s9, v2 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v3 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v6, s9 +; GFX8-NEXT: v_mov_b32_e32 v5, s11 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v0 +; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s9, v1 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v4 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s10, v3 -; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v6 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s10, v2 +; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc +; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 +; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 +; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc +; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s10, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] ; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s10, v7 +; GFX8-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc ; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, v14, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v4, vcc ; GFX8-NEXT: v_mov_b32_e32 v4, s4 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GFX8-NEXT: v_mov_b32_e32 v5, s5 ; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v0, s6 @@ -265,138 +256,132 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s11 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s10 -; GFX9-NEXT: s_sub_u32 s0, 0, s10 -; GFX9-NEXT: s_subb_u32 s1, 0, s11 +; GFX9-NEXT: s_sub_u32 s2, 0, s10 +; GFX9-NEXT: s_subb_u32 s3, 0, s11 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 +; GFX9-NEXT: v_trunc_f32_e32 v2, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v6, v3 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v8 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 +; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v8, s9 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX9-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v5, s8, v0 +; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v5, s9, v1 +; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s9, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v5, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v5, v3 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v5, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 -; GFX9-NEXT: v_mul_hi_u32 v5, s10, v0 -; GFX9-NEXT: v_mul_lo_u32 v7, s10, v0 -; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v5 -; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s8, v7 -; GFX9-NEXT: v_subb_co_u32_e64 v5, s[0:1], v8, v2, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v5 -; GFX9-NEXT: v_sub_u32_e32 v2, s9, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v3 +; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v6, s9 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v5, v[1:2] +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v0 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v6 +; GFX9-NEXT: v_sub_u32_e32 v0, s9, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v5 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s10, v3 -; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v2, vcc -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v6 +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[0:1] +; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s10, v2 +; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v9 -; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s10, v8 +; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s10, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 -; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc +; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v12 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v2, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v7, vcc -; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] -; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v7, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v15, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc +; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] +; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_i64: @@ -414,64 +399,62 @@ ; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX10-NEXT: v_trunc_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX10-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX10-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX10-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v6, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v0, v5 -; GFX10-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX10-NEXT: v_add_co_u32 v3, s2, v3, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s2 +; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v4, s0, v2 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s1, v3 +; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 +; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 +; GFX10-NEXT: v_mul_lo_u32 v4, v2, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX10-NEXT: v_mul_lo_u32 v7, v2, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX10-NEXT: v_add_co_u32 v4, s2, v4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s2 ; GFX10-NEXT: v_add_co_u32 v6, s2, v7, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v3, s2, v3, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v5, s2, v6, v8 +; GFX10-NEXT: v_add_co_u32 v0, s2, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX10-NEXT: v_add_co_u32 v4, s2, v6, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s2 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v6 -; GFX10-NEXT: v_add_co_u32 v3, s2, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add3_u32 v2, v4, v5, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX10-NEXT: v_mul_lo_u32 v4, s0, v1 -; GFX10-NEXT: v_mul_hi_u32 v6, v1, v5 -; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX10-NEXT: v_mul_hi_u32 v5, v0, v5 -; GFX10-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX10-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX10-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX10-NEXT: v_add_co_u32 v3, s0, v3, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 +; GFX10-NEXT: v_add_co_u32 v0, s2, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s2 +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 +; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 +; GFX10-NEXT: v_mul_lo_u32 v4, s1, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s0, v2 +; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 +; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX10-NEXT: v_mul_lo_u32 v4, v2, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 +; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX10-NEXT: v_mul_lo_u32 v7, v2, v1 +; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 ; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v3, s0, v3, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v5, s0, v6, v8 +; GFX10-NEXT: v_add_co_u32 v0, s0, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v6 -; GFX10-NEXT: v_add_co_u32 v3, s0, v5, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add3_u32 v2, v4, v5, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 +; GFX10-NEXT: v_add_co_u32 v0, s0, v4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 +; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_hi_u32 v4, s9, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v1, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX10-NEXT: v_mul_hi_u32 v0, s8, v0 ; GFX10-NEXT: v_mul_lo_u32 v3, s8, v1 @@ -488,53 +471,52 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v5, v4 -; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX10-NEXT: v_mul_lo_u32 v5, s10, v0 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1 -; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, s11, v0 -; GFX10-NEXT: v_mul_hi_u32 v3, s10, v0 -; GFX10-NEXT: v_mul_lo_u32 v4, s10, v1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 -; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v2 -; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, s8, v5 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s0, s9, v2, vcc_lo -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v2, vcc_lo, s11, v8, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v5, s10 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v9 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v2, vcc_lo, s11, v2, vcc_lo +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX10-NEXT: v_mul_lo_u32 v4, s11, v2 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 +; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v2, 0 +; GFX10-NEXT: v_mul_lo_u32 v5, s10, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo +; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v6, 1 +; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v7, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v1 +; GFX10-NEXT: v_sub_co_u32 v9, vcc_lo, s8, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v10, s0, s9, v1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s11, v8, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v9, s10 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v0, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v10 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s11, v0, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s11, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s10, v10 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s10, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, -1, s0 ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v12, v8, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s11, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v1, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v14, v13, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v10, s10 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v8, s10 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, 0, v0, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v12 -; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v8 -; GFX10-NEXT: v_mov_b32_e32 v8, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v13, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v11, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v6, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v7, s1 -; GFX10-NEXT: global_store_dwordx2 v8, v[0:1], s[4:5] -; GFX10-NEXT: global_store_dwordx2 v8, v[2:3], s[6:7] +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v13, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v4, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v9, v6, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v10, v5, s1 +; GFX10-NEXT: global_store_dwordx2 v7, v[0:1], s[4:5] +; GFX10-NEXT: global_store_dwordx2 v7, v[2:3], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv i64 %x, %y store i64 %div, i64 addrspace(1)* %out0 @@ -1016,278 +998,261 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX8-NEXT: s_sub_u32 s0, 0, s12 -; GFX8-NEXT: s_subb_u32 s1, 0, s13 +; GFX8-NEXT: s_sub_u32 s2, 0, s12 +; GFX8-NEXT: s_subb_u32 s3, 0, s13 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_u32 s2, 0, s14 -; GFX8-NEXT: s_subb_u32 s3, 0, s15 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX8-NEXT: v_trunc_f32_e32 v1, v1 -; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX8-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GFX8-NEXT: v_trunc_f32_e32 v2, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v6, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 -; GFX8-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s0, v0 -; GFX8-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, s13 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX8-NEXT: s_sub_u32 s2, 0, s14 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX8-NEXT: s_subb_u32 s3, 0, s15 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 -; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 -; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 +; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 -; GFX8-NEXT: v_mul_hi_u32 v5, s8, v0 +; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v5, s9, v1 +; GFX8-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 ; GFX8-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 -; GFX8-NEXT: v_mul_hi_u32 v1, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 +; GFX8-NEXT: v_add_u32_e32 v5, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v4, s9, v1 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v5, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v2, s13, v0 -; GFX8-NEXT: v_mul_lo_u32 v3, s12, v1 -; GFX8-NEXT: v_mul_hi_u32 v7, s12, v0 -; GFX8-NEXT: v_mul_lo_u32 v5, s12, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s8, v5 -; GFX8-NEXT: v_subb_u32_e64 v5, s[0:1], v4, v2, vcc -; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s9, v2 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v5 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s12, v3 -; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 -; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v4, v2 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v6, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v4, s9 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s8, v0 +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v5, v[1:2] +; GFX8-NEXT: v_mov_b32_e32 v3, s13 +; GFX8-NEXT: v_subb_u32_e64 v8, s[0:1], v4, v1, vcc +; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s9, v1 ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 -; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v8 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s12, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 -; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v13, s15 -; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc -; GFX8-NEXT: v_cvt_f32_u32_e32 v12, s14 -; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX8-NEXT: v_mul_f32_e32 v4, 0x4f800000, v13 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX8-NEXT: v_add_f32_e32 v4, v4, v12 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v9, v4 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX8-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v9 -; GFX8-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3 -; GFX8-NEXT: v_trunc_f32_e32 v6, v6 -; GFX8-NEXT: v_mul_f32_e32 v7, 0xcf800000, v6 -; GFX8-NEXT: v_add_f32_e32 v3, v7, v3 -; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GFX8-NEXT: v_mul_lo_u32 v7, s3, v3 -; GFX8-NEXT: v_mul_lo_u32 v8, s2, v6 -; GFX8-NEXT: v_mul_hi_u32 v10, s2, v3 -; GFX8-NEXT: v_mul_lo_u32 v9, s2, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc -; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v8 -; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v10 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v9 -; GFX8-NEXT: v_mul_lo_u32 v10, v3, v7 -; GFX8-NEXT: v_mul_hi_u32 v2, v3, v9 -; GFX8-NEXT: v_mul_hi_u32 v9, v6, v9 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v9, v1, v2, s[0:1] +; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s15 +; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX8-NEXT: v_subb_u32_e32 v4, vcc, v0, v3, vcc +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v1 +; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v7 +; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v4, vcc +; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX8-NEXT: v_trunc_f32_e32 v2, v1 +; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v0 +; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v5 +; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v6, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v12, 0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v2 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 +; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] +; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v15, v[1:2] +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] +; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v12, v[1:2] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 +; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] +; GFX8-NEXT: v_mul_lo_u32 v2, v15, v0 +; GFX8-NEXT: v_mul_lo_u32 v17, v12, v1 +; GFX8-NEXT: v_mul_hi_u32 v4, v12, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, v15, v0 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v17 +; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v10, v2 -; GFX8-NEXT: v_mul_hi_u32 v10, v3, v7 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10 -; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v10 -; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v9, v8 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v6, v7, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, s3, v2 -; GFX8-NEXT: v_mul_lo_u32 v7, s2, v3 -; GFX8-NEXT: v_mul_hi_u32 v9, s2, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, s2, v2 -; GFX8-NEXT: v_mov_b32_e32 v10, s15 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 -; GFX8-NEXT: v_mul_lo_u32 v7, v3, v8 -; GFX8-NEXT: v_mul_lo_u32 v9, v2, v6 -; GFX8-NEXT: v_mul_hi_u32 v11, v2, v8 -; GFX8-NEXT: v_mul_hi_u32 v8, v3, v8 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 -; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v11, v3, v6 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v9, v2, v6 -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 -; GFX8-NEXT: v_mul_hi_u32 v6, v3, v6 -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v8, vcc, v9, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v4, v15, v1 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v17, v2 +; GFX8-NEXT: v_mul_hi_u32 v17, v12, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v17 +; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v17 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 1, v13 +; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v14, vcc +; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, s12, v10 +; GFX8-NEXT: v_mul_hi_u32 v1, v15, v1 +; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v0 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v12, 0 +; GFX8-NEXT: v_addc_u32_e32 v15, vcc, v15, v1, vcc +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v17, vcc +; GFX8-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, v[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v18, vcc +; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v12, v[3:4] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v6, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v19, s[0:1] +; GFX8-NEXT: v_mul_lo_u32 v6, v15, v2 +; GFX8-NEXT: v_mul_lo_u32 v9, v12, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; GFX8-NEXT: v_mul_hi_u32 v7, v12, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v20, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v6, v9 +; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v6, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX8-NEXT: v_mul_lo_u32 v7, v15, v3 +; GFX8-NEXT: v_mul_hi_u32 v2, v15, v2 +; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v9, v6 +; GFX8-NEXT: v_mul_hi_u32 v9, v12, v3 +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v7, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v2, v9 +; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v3, v15, v3 +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v2, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v7, v6 +; GFX8-NEXT: v_add_u32_e64 v3, s[0:1], v3, v6 +; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v12, v2 +; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], v15, v3, s[0:1] ; GFX8-NEXT: v_mul_lo_u32 v6, s11, v2 ; GFX8-NEXT: v_mul_lo_u32 v7, s10, v3 -; GFX8-NEXT: v_mul_hi_u32 v9, s10, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc +; GFX8-NEXT: v_mul_hi_u32 v8, s10, v2 ; GFX8-NEXT: v_mul_hi_u32 v2, s11, v2 -; GFX8-NEXT: v_mov_b32_e32 v8, s11 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX8-NEXT: v_mul_lo_u32 v9, s11, v3 +; GFX8-NEXT: v_mul_lo_u32 v8, s11, v3 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 ; GFX8-NEXT: v_mul_hi_u32 v7, s10, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v9, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 -; GFX8-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6 +; GFX8-NEXT: v_mul_hi_u32 v9, s11, v3 +; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s14, v8, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 -; GFX8-NEXT: v_mul_lo_u32 v6, s15, v2 -; GFX8-NEXT: v_mul_lo_u32 v7, s14, v3 -; GFX8-NEXT: v_mul_hi_u32 v11, s14, v2 -; GFX8-NEXT: v_mul_lo_u32 v9, s14, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11 -; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s10, v9 -; GFX8-NEXT: v_subb_u32_e64 v8, s[0:1], v8, v6, vcc -; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s11, v6 -; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s14, v9, v[3:4] +; GFX8-NEXT: v_mov_b32_e32 v10, s11 +; GFX8-NEXT: v_mov_b32_e32 v3, s15 +; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s15, v8, v[6:7] +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s10, v2 +; GFX8-NEXT: v_subb_u32_e64 v10, s[0:1], v10, v6, vcc +; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s11, v6 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10 +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v8 -; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10 +; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[0:1] ; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s14, v7 -; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc +; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v2, vcc ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 ; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] -; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v2 -; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc -; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v3, s[0:1] -; GFX8-NEXT: v_add_u32_e32 v10, vcc, 1, v14 +; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v8 +; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc +; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v9, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v14 ; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s14, v11 -; GFX8-NEXT: v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v10, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v13, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v10, v12, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v6, v7, v9, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v10, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc +; GFX8-NEXT: v_subbrev_u32_e64 v14, s[0:1], 0, v2, s[0:1] +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 +; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v8, v12, v14, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v15, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v10, v8, s[0:1] ; GFX8-NEXT: v_mov_b32_e32 v9, s5 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] ; GFX8-NEXT: v_mov_b32_e32 v8, s4 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GFX8-NEXT: s_nop 0 @@ -1302,83 +1267,76 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s13 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s12 -; GFX9-NEXT: s_sub_u32 s0, 0, s12 -; GFX9-NEXT: s_subb_u32 s1, 0, s13 +; GFX9-NEXT: s_sub_u32 s2, 0, s12 +; GFX9-NEXT: s_subb_u32 s3, 0, s13 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f32_u32_e32 v14, s15 -; GFX9-NEXT: s_sub_u32 s2, 0, s14 -; GFX9-NEXT: s_subb_u32 s3, 0, s15 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 -; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_f32_e32 v14, 0x4f800000, v14 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v1, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, v0, v2 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 +; GFX9-NEXT: v_trunc_f32_e32 v2, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 +; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v7, v5 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v6, v3 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v8 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 +; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 -; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2] +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 +; GFX9-NEXT: s_sub_u32 s2, 0, s14 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2] +; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 +; GFX9-NEXT: s_subb_u32 s3, 0, s15 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 ; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 ; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mov_b32_e32 v5, s13 +; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 @@ -1386,183 +1344,179 @@ ; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s9, v1 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s12, v5, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 -; GFX9-NEXT: v_mul_lo_u32 v2, s13, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 -; GFX9-NEXT: v_mul_hi_u32 v4, s12, v0 -; GFX9-NEXT: v_mul_lo_u32 v6, s12, v0 -; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 -; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s8, v6 -; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v7, v2, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v6 -; GFX9-NEXT: v_sub_u32_e32 v2, s9, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v6 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s12, v3 -; GFX9-NEXT: v_subbrev_co_u32_e64 v8, s[0:1], 0, v2, vcc -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-NEXT: v_cvt_f32_u32_e32 v5, s14 -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_add_f32_e32 v5, v14, v5 +; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 +; GFX9-NEXT: v_add3_u32 v6, v3, v2, v6 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v6, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s8, v0 +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v5, v[1:2] +; GFX9-NEXT: v_mov_b32_e32 v3, s13 +; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v4, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v8 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] +; GFX9-NEXT: v_sub_u32_e32 v0, s9, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v8 -; GFX9-NEXT: v_subrev_co_u32_e32 v15, vcc, s12, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] -; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_add_co_u32_e64 v12, s[0:1], 1, v9 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; GFX9-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc -; GFX9-NEXT: v_mul_f32_e32 v12, 0x2f800000, v5 -; GFX9-NEXT: v_addc_co_u32_e64 v13, s[0:1], 0, v10, s[0:1] -; GFX9-NEXT: v_trunc_f32_e32 v12, v12 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc -; GFX9-NEXT: v_mul_f32_e32 v13, 0xcf800000, v12 -; GFX9-NEXT: v_add_f32_e32 v5, v13, v5 -; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v12 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX9-NEXT: v_mul_lo_u32 v13, s3, v5 -; GFX9-NEXT: v_mul_lo_u32 v14, s2, v12 -; GFX9-NEXT: v_mul_hi_u32 v16, s2, v5 -; GFX9-NEXT: v_mul_lo_u32 v17, s2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GFX9-NEXT: v_add3_u32 v4, v13, v14, v16 -; GFX9-NEXT: v_mul_lo_u32 v9, v12, v17 -; GFX9-NEXT: v_mul_lo_u32 v13, v5, v4 -; GFX9-NEXT: v_mul_hi_u32 v10, v5, v17 -; GFX9-NEXT: v_mul_hi_u32 v14, v12, v17 -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v9, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v9, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v10, v12, v4 -; GFX9-NEXT: v_add_u32_e32 v9, v13, v9 -; GFX9-NEXT: v_mul_hi_u32 v13, v5, v4 -; GFX9-NEXT: v_mul_hi_u32 v4, v12, v4 -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], v10, v14 -; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], v10, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v10, v9 -; GFX9-NEXT: v_add_u32_e32 v13, v14, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[0:1] -; GFX9-NEXT: v_add3_u32 v4, v13, v10, v4 -; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v9 -; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], v12, v4, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v4, s3, v5 -; GFX9-NEXT: v_mul_lo_u32 v12, s2, v9 -; GFX9-NEXT: v_mul_hi_u32 v13, s2, v5 -; GFX9-NEXT: v_mul_lo_u32 v10, s2, v5 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] -; GFX9-NEXT: v_add3_u32 v8, v4, v12, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v11, v9, v10 -; GFX9-NEXT: v_mul_lo_u32 v12, v5, v8 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc -; GFX9-NEXT: v_mul_hi_u32 v3, v5, v10 -; GFX9-NEXT: v_mul_hi_u32 v10, v9, v10 -; GFX9-NEXT: v_add_co_u32_e64 v7, s[0:1], v11, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v3, s[0:1], v7, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v8 -; GFX9-NEXT: v_add_u32_e32 v3, v11, v3 -; GFX9-NEXT: v_mul_hi_u32 v11, v5, v8 -; GFX9-NEXT: v_mul_hi_u32 v8, v9, v8 -; GFX9-NEXT: v_add_co_u32_e64 v7, s[0:1], v7, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v9, v1, v2, s[0:1] +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s14 +; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v0, v3, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v1 +; GFX9-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s12, v7 +; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX9-NEXT: v_trunc_f32_e32 v2, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v0 +; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v5 +; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v6, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v12, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v2 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1] +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v15, v[1:2] +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v12, v[1:2] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v2, v15, v0 +; GFX9-NEXT: v_mul_lo_u32 v17, v12, v1 +; GFX9-NEXT: v_mul_hi_u32 v4, v12, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, v15, v0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v17 +; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, v15, v1 +; GFX9-NEXT: v_add_u32_e32 v2, v17, v2 +; GFX9-NEXT: v_mul_hi_u32 v17, v12, v1 +; GFX9-NEXT: v_mul_hi_u32 v1, v15, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v17 +; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v17 +; GFX9-NEXT: v_add_co_u32_e32 v17, vcc, 1, v13 +; GFX9-NEXT: v_addc_co_u32_e32 v18, vcc, 0, v14, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s12, v10 +; GFX9-NEXT: v_subbrev_co_u32_e32 v20, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v0 +; GFX9-NEXT: v_add3_u32 v1, v4, v2, v1 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v12, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v15, v1, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v13, v17, vcc +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v13, v14, v18, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v12, v[3:4] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v13, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v16 +; GFX9-NEXT: v_mul_lo_u32 v5, v15, v2 +; GFX9-NEXT: v_mul_lo_u32 v6, v12, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v19, s[0:1] +; GFX9-NEXT: v_mul_hi_u32 v10, v12, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, v20, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v10, v15, v3 +; GFX9-NEXT: v_mul_hi_u32 v2, v15, v2 +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mul_hi_u32 v6, v12, v3 +; GFX9-NEXT: v_mul_hi_u32 v3, v15, v3 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v10, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v7, s[0:1], v7, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v3, s[0:1], v7, v3 -; GFX9-NEXT: v_add_u32_e32 v10, v10, v11 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v2, v5 +; GFX9-NEXT: v_add_u32_e32 v6, v10, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] +; GFX9-NEXT: v_add3_u32 v3, v6, v5, v3 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v12, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v15, v3, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v5, s11, v2 +; GFX9-NEXT: v_mul_lo_u32 v6, s10, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; GFX9-NEXT: v_mul_hi_u32 v7, s10, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, s11, v2 +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v7, s11, v3 +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mul_hi_u32 v6, s10, v3 +; GFX9-NEXT: v_mul_hi_u32 v12, s11, v3 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v7, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] -; GFX9-NEXT: v_add3_u32 v7, v10, v7, v8 -; GFX9-NEXT: v_add_co_u32_e64 v3, s[0:1], v5, v3 -; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], v9, v7, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v8, s11, v3 -; GFX9-NEXT: v_mul_lo_u32 v9, s10, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v2, vcc -; GFX9-NEXT: v_mul_hi_u32 v2, s10, v3 -; GFX9-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v8, v9 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, s11, v7 -; GFX9-NEXT: v_add_u32_e32 v2, v8, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, s10, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, s11, v7 -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v6, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GFX9-NEXT: v_add3_u32 v3, v6, v3, v7 -; GFX9-NEXT: v_mul_lo_u32 v6, s15, v2 -; GFX9-NEXT: v_mul_lo_u32 v7, s14, v3 -; GFX9-NEXT: v_mul_hi_u32 v8, s14, v2 -; GFX9-NEXT: v_mul_lo_u32 v10, s14, v2 -; GFX9-NEXT: v_mov_b32_e32 v11, s11 -; GFX9-NEXT: v_mov_b32_e32 v9, s15 -; GFX9-NEXT: v_add3_u32 v6, v6, v7, v8 -; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v10 -; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v11, v6, vcc -; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v8 -; GFX9-NEXT: v_sub_u32_e32 v6, s11, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v2, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], v2, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[0:1] +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s14, v10, 0 +; GFX9-NEXT: v_add_u32_e32 v6, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12 +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s14, v8, v[3:4] +; GFX9-NEXT: v_mov_b32_e32 v9, s11 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s15, v10, v[6:7] +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v2 +; GFX9-NEXT: v_subb_co_u32_e64 v9, s[0:1], v9, v6, vcc +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v9 +; GFX9-NEXT: v_sub_u32_e32 v2, s11, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v8 -; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[0:1] ; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s14, v7 -; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc +; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v2, vcc ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11 ; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v3, s[0:1] -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 1, v14 +; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v10 +; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v8, s[0:1] +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, 1, v14 ; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v15, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc ; GFX9-NEXT: v_subrev_co_u32_e64 v15, s[0:1], s14, v11 -; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[0:1], 0, v6, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 +; GFX9-NEXT: v_subbrev_co_u32_e64 v16, s[0:1], 0, v2, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 ; GFX9-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v15, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v10, v12, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, v9, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v10, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v14, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v6, v11, v15, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v8, s[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v13, v[0:3], s[4:5] ; GFX9-NEXT: global_store_dwordx4 v13, v[4:7], s[6:7] @@ -1573,17 +1527,17 @@ ; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s13 -; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s15 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s12 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 +; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s12 ; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s14 ; GFX10-NEXT: s_sub_u32 s0, 0, s12 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 -; GFX10-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 ; GFX10-NEXT: s_subb_u32 s1, 0, s13 ; GFX10-NEXT: s_sub_u32 s2, 0, s14 ; GFX10-NEXT: s_subb_u32 s3, 0, s15 -; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_add_f32_e32 v1, v2, v3 +; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -1594,246 +1548,240 @@ ; GFX10-NEXT: v_trunc_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v2 ; GFX10-NEXT: v_mul_f32_e32 v5, 0xcf800000, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v3 ; GFX10-NEXT: v_add_f32_e32 v0, v4, v0 ; GFX10-NEXT: v_add_f32_e32 v1, v5, v1 -; GFX10-NEXT: v_mul_lo_u32 v4, s0, v2 -; GFX10-NEXT: v_mul_lo_u32 v8, s2, v3 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX10-NEXT: v_mul_lo_u32 v5, s1, v0 -; GFX10-NEXT: v_mul_hi_u32 v6, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v9, s3, v1 -; GFX10-NEXT: v_mul_hi_u32 v10, s2, v1 -; GFX10-NEXT: v_mul_lo_u32 v7, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v11, s2, v1 -; GFX10-NEXT: v_add3_u32 v4, v5, v4, v6 -; GFX10-NEXT: v_add3_u32 v8, v9, v8, v10 -; GFX10-NEXT: v_mul_lo_u32 v5, v2, v7 -; GFX10-NEXT: v_mul_hi_u32 v6, v0, v7 -; GFX10-NEXT: v_mul_lo_u32 v12, v0, v4 -; GFX10-NEXT: v_mul_hi_u32 v7, v2, v7 -; GFX10-NEXT: v_mul_lo_u32 v13, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v9, v3, v11 -; GFX10-NEXT: v_mul_lo_u32 v15, v1, v8 -; GFX10-NEXT: v_mul_hi_u32 v10, v1, v11 -; GFX10-NEXT: v_mul_hi_u32 v11, v3, v11 -; GFX10-NEXT: v_mul_lo_u32 v16, v3, v8 -; GFX10-NEXT: v_add_co_u32 v5, s6, v5, v12 -; GFX10-NEXT: v_mul_hi_u32 v14, v0, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX10-NEXT: v_mul_lo_u32 v10, s2, v6 +; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1 +; GFX10-NEXT: v_mul_lo_u32 v7, s0, v4 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mul_lo_u32 v9, s1, v5 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s6, s2, v8, 0 +; GFX10-NEXT: v_mul_lo_u32 v11, s3, v8 +; GFX10-NEXT: v_add3_u32 v1, v1, v7, v9 +; GFX10-NEXT: v_mul_lo_u32 v7, v4, v0 +; GFX10-NEXT: v_mul_hi_u32 v9, v5, v0 +; GFX10-NEXT: v_add3_u32 v3, v3, v10, v11 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX10-NEXT: v_mul_lo_u32 v12, v5, v1 +; GFX10-NEXT: v_mul_lo_u32 v13, v4, v1 +; GFX10-NEXT: v_mul_lo_u32 v10, v6, v2 +; GFX10-NEXT: v_mul_lo_u32 v15, v8, v3 +; GFX10-NEXT: v_mul_hi_u32 v11, v8, v2 +; GFX10-NEXT: v_mul_hi_u32 v2, v6, v2 +; GFX10-NEXT: v_mul_lo_u32 v16, v6, v3 +; GFX10-NEXT: v_mul_hi_u32 v14, v5, v1 +; GFX10-NEXT: v_add_co_u32 v7, s6, v7, v12 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v7, s6, v13, v7 +; GFX10-NEXT: v_add_co_u32 v0, s6, v13, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v9, s6, v9, v15 +; GFX10-NEXT: v_add_co_u32 v10, s6, v10, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v11, s6, v16, v11 +; GFX10-NEXT: v_add_co_u32 v2, s6, v16, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v5, s6, v5, v6 -; GFX10-NEXT: v_mul_hi_u32 v17, v1, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v6, s6, v7, v14 +; GFX10-NEXT: v_add_co_u32 v7, s6, v7, v9 +; GFX10-NEXT: v_mul_hi_u32 v17, v8, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v9, s6, v9, v10 +; GFX10-NEXT: v_add_co_u32 v0, s6, v0, v14 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s6 -; GFX10-NEXT: v_add_nc_u32_e32 v5, v12, v5 -; GFX10-NEXT: v_add_co_u32 v10, s6, v11, v17 -; GFX10-NEXT: v_mul_hi_u32 v4, v2, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v9, v15, v9 +; GFX10-NEXT: v_add_co_u32 v10, s6, v10, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s6 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v12, v7 +; GFX10-NEXT: v_add_co_u32 v2, s6, v2, v17 +; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v15, v10 ; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s6 -; GFX10-NEXT: v_add_co_u32 v5, s6, v6, v5 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v13, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 -; GFX10-NEXT: v_mul_hi_u32 v8, v3, v8 -; GFX10-NEXT: v_add_co_u32 v9, s6, v10, v9 +; GFX10-NEXT: v_add_co_u32 v0, s6, v0, v7 +; GFX10-NEXT: v_add_nc_u32_e32 v9, v13, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s6 +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v3 +; GFX10-NEXT: v_add_co_u32 v2, s6, v2, v10 ; GFX10-NEXT: v_add_nc_u32_e32 v11, v16, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s6 -; GFX10-NEXT: v_add3_u32 v4, v7, v6, v4 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX10-NEXT: v_add3_u32 v5, v11, v10, v8 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo -; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v6, s1, v0 -; GFX10-NEXT: v_mul_hi_u32 v7, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v5, s0, v2 -; GFX10-NEXT: v_mul_lo_u32 v9, s3, v1 -; GFX10-NEXT: v_mul_hi_u32 v10, s2, v1 -; GFX10-NEXT: v_mul_lo_u32 v11, s2, v3 -; GFX10-NEXT: v_mul_lo_u32 v4, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v8, s2, v1 -; GFX10-NEXT: v_mov_b32_e32 v12, 0 -; GFX10-NEXT: v_add3_u32 v5, v6, v5, v7 -; GFX10-NEXT: v_add3_u32 v9, v9, v11, v10 -; GFX10-NEXT: v_mul_lo_u32 v13, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v10, v0, v5 -; GFX10-NEXT: v_mul_hi_u32 v14, v0, v4 -; GFX10-NEXT: v_mul_hi_u32 v4, v2, v4 -; GFX10-NEXT: v_mul_lo_u32 v11, v2, v5 -; GFX10-NEXT: v_mul_lo_u32 v6, v3, v8 -; GFX10-NEXT: v_mul_lo_u32 v16, v1, v9 -; GFX10-NEXT: v_mul_hi_u32 v7, v1, v8 -; GFX10-NEXT: v_mul_hi_u32 v8, v3, v8 -; GFX10-NEXT: v_mul_lo_u32 v17, v3, v9 -; GFX10-NEXT: v_add_co_u32 v10, s0, v13, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v11, v4 -; GFX10-NEXT: v_mul_hi_u32 v15, v0, v5 +; GFX10-NEXT: v_add3_u32 v1, v9, v7, v1 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v5, v0 +; GFX10-NEXT: v_add3_u32 v3, v11, v10, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v3, vcc_lo +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mul_lo_u32 v7, s1, v5 +; GFX10-NEXT: v_mul_lo_u32 v9, s0, v4 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mul_lo_u32 v10, s3, v8 +; GFX10-NEXT: v_mul_lo_u32 v11, s2, v6 +; GFX10-NEXT: v_mul_lo_u32 v12, v4, v0 +; GFX10-NEXT: v_mul_hi_u32 v13, v5, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, v4, v0 +; GFX10-NEXT: v_add3_u32 v1, v1, v9, v7 +; GFX10-NEXT: v_mul_lo_u32 v7, v6, v2 +; GFX10-NEXT: v_mul_hi_u32 v9, v8, v2 +; GFX10-NEXT: v_mul_hi_u32 v2, v6, v2 +; GFX10-NEXT: v_add3_u32 v3, v3, v11, v10 +; GFX10-NEXT: v_mul_lo_u32 v10, v5, v1 +; GFX10-NEXT: v_mul_lo_u32 v11, v4, v1 +; GFX10-NEXT: v_mul_hi_u32 v14, v5, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 +; GFX10-NEXT: v_mul_lo_u32 v15, v8, v3 +; GFX10-NEXT: v_mul_lo_u32 v16, v6, v3 +; GFX10-NEXT: v_mul_hi_u32 v17, v8, v3 +; GFX10-NEXT: v_mul_hi_u32 v3, v6, v3 +; GFX10-NEXT: v_add_co_u32 v10, s0, v12, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, v11, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v16 +; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v16, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v8, s0, v17, v8 -; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v10, s0, v10, v14 -; GFX10-NEXT: v_mul_hi_u32 v18, v1, v9 +; GFX10-NEXT: v_add_co_u32 v10, s0, v10, v13 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v13, v10 -; GFX10-NEXT: v_add_co_u32 v8, s0, v8, v18 -; GFX10-NEXT: v_mul_hi_u32 v5, v2, v5 +; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v9, v12, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v17 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v16, v6 -; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v7 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v11, v14 +; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v11, v11, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v15, v7 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v5, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v16, v10 +; GFX10-NEXT: v_add3_u32 v1, v11, v9, v1 +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_mul_hi_u32 v9, v3, v9 -; GFX10-NEXT: v_add_co_u32 v6, s0, v8, v6 -; GFX10-NEXT: v_add_nc_u32_e32 v10, v17, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 -; GFX10-NEXT: v_add3_u32 v5, v11, v7, v5 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 -; GFX10-NEXT: v_add3_u32 v4, v10, v8, v9 -; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v6 -; GFX10-NEXT: v_mul_lo_u32 v5, s9, v0 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v4, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v7, s8, v0 +; GFX10-NEXT: v_mul_hi_u32 v5, s8, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v4, s9, v0 +; GFX10-NEXT: v_add3_u32 v3, v10, v7, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX10-NEXT: v_mul_lo_u32 v9, s9, v2 -; GFX10-NEXT: v_mul_hi_u32 v10, s8, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, s9, v2 -; GFX10-NEXT: v_mul_lo_u32 v6, s11, v1 -; GFX10-NEXT: v_mul_hi_u32 v8, s10, v1 -; GFX10-NEXT: v_add_co_u32 v4, s0, v5, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 +; GFX10-NEXT: v_mul_lo_u32 v7, s8, v1 +; GFX10-NEXT: v_mul_lo_u32 v10, s9, v1 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v6, v3, vcc_lo +; GFX10-NEXT: v_mul_hi_u32 v6, s8, v1 +; GFX10-NEXT: v_mul_hi_u32 v1, s9, v1 ; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v10 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_mul_lo_u32 v10, s10, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v5, v4 -; GFX10-NEXT: v_mul_hi_u32 v1, s11, v1 -; GFX10-NEXT: v_mul_lo_u32 v5, s11, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v9, v7 -; GFX10-NEXT: v_mul_hi_u32 v11, s10, v3 -; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v10 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v5, v1 -; GFX10-NEXT: v_add3_u32 v2, v7, v4, v2 -; GFX10-NEXT: v_mul_lo_u32 v5, s13, v0 -; GFX10-NEXT: v_mul_hi_u32 v7, s12, v0 +; GFX10-NEXT: v_add_co_u32 v0, s0, v10, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 +; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX10-NEXT: v_mul_lo_u32 v13, s12, v0 -; GFX10-NEXT: v_mul_lo_u32 v10, s12, v2 +; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v6 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX10-NEXT: v_mul_lo_u32 v6, s11, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v4 +; GFX10-NEXT: v_mul_lo_u32 v7, s10, v3 +; GFX10-NEXT: v_mul_lo_u32 v10, s11, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v5, v8, v5 +; GFX10-NEXT: v_mul_hi_u32 v8, s10, v2 +; GFX10-NEXT: v_add_co_u32 v4, s0, v0, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX10-NEXT: v_mul_hi_u32 v2, s11, v2 +; GFX10-NEXT: v_mul_hi_u32 v11, s10, v3 +; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v7 +; GFX10-NEXT: v_add3_u32 v5, v5, v0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s12, v4, 0 +; GFX10-NEXT: v_mul_lo_u32 v12, s13, v4 +; GFX10-NEXT: v_mul_lo_u32 v13, s12, v5 +; GFX10-NEXT: v_add_co_u32 v2, s0, v10, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 ; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v11 +; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 +; GFX10-NEXT: v_add3_u32 v1, v1, v13, v12 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v7, v6 ; GFX10-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX10-NEXT: v_add3_u32 v5, v5, v10, v7 -; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s8, v13 -; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v9, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v5 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v5, s0, s9, v5, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s12, v7 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s13, v8, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s13, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v11, vcc_lo, v7, s12 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v8, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v5 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s13, v8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v9, v10, v9, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s12, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s13, v13 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v10, v8 +; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v1 +; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, s8, v0 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v11, s0, s9, v1, vcc_lo +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s13, v8, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s12, v10 +; GFX10-NEXT: v_mov_b32_e32 v9, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v10, s12 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s0, 0, v0, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s13, v11 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s13, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, -1, s0 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s12, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, s0 -; GFX10-NEXT: v_add_co_u32 v6, s0, v1, v6 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s13, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, -1, s0 +; GFX10-NEXT: v_add_co_u32 v16, s0, v4, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v5, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v1, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v14, v15, v14, s0 +; GFX10-NEXT: v_add_co_u32 v6, s0, v2, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX10-NEXT: v_add_co_u32 v15, s0, v0, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v2, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s13, v13 -; GFX10-NEXT: v_add3_u32 v3, v4, v1, v3 -; GFX10-NEXT: v_mul_hi_u32 v18, s14, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v14, v10, s0 -; GFX10-NEXT: v_mul_lo_u32 v14, s15, v6 -; GFX10-NEXT: v_mul_lo_u32 v17, s14, v3 -; GFX10-NEXT: v_add_co_u32 v1, s0, v15, 1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s0, 0, v16, s0 -; GFX10-NEXT: v_sub_co_u32 v19, s0, v11, s12 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc_lo -; GFX10-NEXT: v_mul_lo_u32 v15, s14, v6 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc_lo -; GFX10-NEXT: v_add3_u32 v14, v14, v17, v18 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v8, s0, 0, v8, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo -; GFX10-NEXT: v_sub_nc_u32_e32 v2, s11, v14 -; GFX10-NEXT: v_sub_co_u32 v9, s0, s10, v15 -; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s1, s11, v14, s0 +; GFX10-NEXT: v_add_co_u32 v15, s0, v16, 1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v17, s0 +; GFX10-NEXT: v_add3_u32 v3, v7, v1, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v6, 0 +; GFX10-NEXT: v_mul_lo_u32 v19, s15, v6 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 +; GFX10-NEXT: v_mul_lo_u32 v7, s14, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v16, s0, v8, s12 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v20, s0, 0, v0, s0 +; GFX10-NEXT: v_add3_u32 v2, v2, v7, v19 +; GFX10-NEXT: v_sub_co_u32 v7, s0, s10, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v13 +; GFX10-NEXT: v_sub_co_ci_u32_e64 v13, s1, s11, v2, s0 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, s11, v2 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v14 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v15, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v17, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v10 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v9 -; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v11, v19, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 -; GFX10-NEXT: v_sub_co_u32 v14, s0, v9, s14 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s2, 0, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v13, v8, s1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s15, v15 -; GFX10-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v7 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v16, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v20, s1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s15, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, s0 +; GFX10-NEXT: v_sub_co_u32 v14, s0, v7, s14 +; GFX10-NEXT: v_subrev_co_ci_u32_e64 v15, s2, 0, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v8, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, v11, s1 -; GFX10-NEXT: v_cmp_le_u32_e64 s1, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s1 +; GFX10-NEXT: v_cmp_le_u32_e64 s1, s15, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, s1 ; GFX10-NEXT: v_cmp_le_u32_e64 s1, s14, v14 -; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s1 -; GFX10-NEXT: v_add_co_u32 v13, s1, v6, 1 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s1 +; GFX10-NEXT: v_add_co_u32 v16, s1, v6, 1 ; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s1, 0, v3, s1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s15, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v11, s1 -; GFX10-NEXT: v_add_co_u32 v11, s1, v13, 1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s15, v15 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1 +; GFX10-NEXT: v_add_co_u32 v10, s1, v16, 1 ; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s1, 0, v17, s1 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v10 -; GFX10-NEXT: v_sub_co_u32 v10, s1, v14, s14 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v8 +; GFX10-NEXT: v_sub_co_u32 v8, s1, v14, s14 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s1, 0, v2, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v13, v11, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v17, v18, s0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v14, v10, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v16, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v11, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v13, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v7, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v10, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v10, v16, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v16, v17, v18, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v14, v8, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v14, v15, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v11, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v10, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v16, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v7, v8, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, v14, s1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v12, v[0:3], s[4:5] -; GFX10-NEXT: global_store_dwordx4 v12, v[4:7], s[6:7] +; GFX10-NEXT: global_store_dwordx4 v9, v[0:3], s[4:5] +; GFX10-NEXT: global_store_dwordx4 v9, v[4:7], s[6:7] ; GFX10-NEXT: s_endpgm %div = udiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64> addrspace(1)* %out0