Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -111,9 +111,11 @@ bool IsRem) const; bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + bool legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2531,11 +2531,176 @@ return true; } +// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32 +// +// Return lo, hi of result +// +// %cvt.lo = G_UITOFP Val.lo +// %cvt.hi = G_UITOFP Val.hi +// %mad = G_FMAD %cvt.hi, 2**32, %cvt.lo +// %rcp = G_AMDGPU_RCP_IFLAG %mad +// %mul1 = G_FMUL %rcp, 0x5f7ffffc +// %mul2 = G_FMUL %mul1, 2**(-32) +// %trunc = G_INTRINSIC_TRUNC %mul2 +// %mad2 = G_FMAD %trunc, -(2**32), %mul1 +// return {G_FPTOUI %mad2, G_FPTOUI %trunc} +static std::pair emitReciprocalU64(MachineIRBuilder &B, + Register Val) { + const LLT S32 = LLT::scalar(32); + auto Unmerge = B.buildUnmerge(S32, Val); + + auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0)); + auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1)); + + auto Mad = B.buildFMAD(S32, CvtHi, // 2**32 + B.buildFConstant(S32, BitsToFloat(0x4f800000)), CvtLo); + + auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad}); + auto Mul1 = + B.buildFMul(S32, Rcp, B.buildFConstant(S32, BitsToFloat(0x5f7ffffc))); + + // 2**(-32) + auto Mul2 = + B.buildFMul(S32, Mul1, B.buildFConstant(S32, BitsToFloat(0x2f800000))); + auto Trunc = B.buildIntrinsicTrunc(S32, Mul2); + + // -(2**32) + auto Mad2 = B.buildFMAD(S32, Trunc, + B.buildFConstant(S32, BitsToFloat(0xcf800000)), Mul1); + + auto ResultLo = B.buildFPTOUI(S32, Mad2); + auto ResultHi = B.buildFPTOUI(S32, Trunc); + + return {ResultLo.getReg(0), ResultHi.getReg(0)}; +} + +bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV; + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + const LLT S1 = LLT::scalar(1); + Register Numer = MI.getOperand(1).getReg(); + Register Denom = MI.getOperand(2).getReg(); + Register RcpLo, RcpHi; + + std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom); + + auto Rcp = B.buildMerge(S64, {RcpLo, RcpHi}); + + auto Zero64 = B.buildConstant(S64, 0); + auto NegDenom = B.buildSub(S64, Zero64, Denom); + + auto MulLo1 = B.buildMul(S64, NegDenom, Rcp); + auto MulHi1 = B.buildUMulH(S64, Rcp, MulLo1); + + auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1); + Register MulHi1_Lo = UnmergeMulHi1.getReg(0); + Register MulHi1_Hi = UnmergeMulHi1.getReg(1); + + auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo); + auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1)); + auto Add1_HiNc = B.buildAdd(S32, RcpHi, MulHi1_Hi); + auto Add1 = B.buildMerge(S64, {Add1_Lo, Add1_Hi}); + + auto MulLo2 = B.buildMul(S64, NegDenom, Add1); + auto MulHi2 = B.buildUMulH(S64, Add1, MulLo2); + auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2); + Register MulHi2_Lo = UnmergeMulHi2.getReg(0); + Register MulHi2_Hi = UnmergeMulHi2.getReg(1); + + auto Zero32 = B.buildConstant(S32, 0); + auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo); + auto Add2_HiC = + B.buildUAdde(S32, S1, Add1_HiNc, MulHi2_Hi, Add1_Lo.getReg(1)); + auto Add2_Hi = B.buildUAdde(S32, S1, Add2_HiC, Zero32, Add2_Lo.getReg(1)); + auto Add2 = B.buildMerge(S64, {Add2_Lo, Add2_Hi}); + + auto UnmergeNumer = B.buildUnmerge(S32, Numer); + Register NumerLo = UnmergeNumer.getReg(0); + Register NumerHi = UnmergeNumer.getReg(1); + + auto MulHi3 = B.buildUMulH(S64, Numer, Add2); + auto Mul3 = B.buildMul(S64, Denom, MulHi3); + auto UnmergeMul3 = B.buildUnmerge(S32, Mul3); + Register Mul3_Lo = UnmergeMul3.getReg(0); + Register Mul3_Hi = UnmergeMul3.getReg(1); + auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo); + auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1)); + auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi); + auto Sub1 = B.buildMerge(S64, {Sub1_Lo, Sub1_Hi}); + + auto UnmergeDenom = B.buildUnmerge(S32, Denom); + Register DenomLo = UnmergeDenom.getReg(0); + Register DenomHi = UnmergeDenom.getReg(1); + + auto CmpHi = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Hi, DenomHi); + auto C1 = B.buildSExt(S32, CmpHi); + + auto CmpLo = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Lo, DenomLo); + auto C2 = B.buildSExt(S32, CmpLo); + + auto CmpEq = B.buildICmp(CmpInst::ICMP_EQ, S1, Sub1_Hi, DenomHi); + auto C3 = B.buildSelect(S32, CmpEq, C2, C1); + + // TODO: Here and below portions of the code can be enclosed into if/endif. + // Currently control flow is unconditional and we have 4 selects after + // potential endif to substitute PHIs. + + // if C3 != 0 ... + auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo); + auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1)); + auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1)); + auto Sub2 = B.buildMerge(S64, {Sub2_Lo, Sub2_Hi}); + + auto One64 = B.buildConstant(S64, 1); + auto Add3 = B.buildAdd(S64, MulHi3, One64); + + auto C4 = + B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi)); + auto C5 = + B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo)); + auto C6 = B.buildSelect( + S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4); + + // if (C6 != 0) + auto Add4 = B.buildAdd(S64, Add3, One64); + auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo); + + auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1)); + auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1)); + auto Sub3 = B.buildMerge(S64, {Sub3_Lo, Sub3_Hi}); + + // endif C6 + // endif C3 + + if (IsDiv) { + auto Sel1 = B.buildSelect( + S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3); + B.buildSelect(MI.getOperand(0), + B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel1, MulHi3); + } else { + auto Sel2 = B.buildSelect( + S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2); + B.buildSelect(MI.getOperand(0), + B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel2, Sub1); + } + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32)) + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty == LLT::scalar(32)) return legalizeUDIV_UREM32(MI, MRI, B); + if (Ty == LLT::scalar(64)) + return legalizeUDIV_UREM64(MI, MRI, B); return false; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_udiv_s32 @@ -297,18 +297,462 @@ ; GFX6-LABEL: name: test_udiv_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] - ; GFX6: $vgpr0_vgpr1 = COPY [[UDIV]](s64) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] - ; GFX8: $vgpr0_vgpr1 = COPY [[UDIV]](s64) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[UDIV]](s64) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UDIV %0, %1 @@ -326,28 +770,907 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]] - ; GFX6: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) + ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]] + ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]] + ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]] + ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]] + ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]] + ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]] + ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]] + ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]] + ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]] + ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]] + ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]] + ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]] + ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]] + ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]] + ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]] + ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]] + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]] + ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]] + ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]] + ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]] + ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]] + ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32) + ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]] + ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]] + ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]] + ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]] + ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32) + ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]] + ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]] + ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32) + ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]] + ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]] + ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]] - ; GFX8: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) + ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]] + ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]] + ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]] + ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]] + ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]] + ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]] + ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]] + ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]] + ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]] + ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]] + ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]] + ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]] + ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]] + ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]] + ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]] + ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]] + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]] + ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]] + ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]] + ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]] + ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]] + ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32) + ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]] + ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]] + ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]] + ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]] + ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32) + ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]] + ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]] + ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32) + ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]] + ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]] + ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]] - ; GFX9: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) + ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]] + ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]] + ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]] + ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]] + ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]] + ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]] + ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]] + ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]] + ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]] + ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]] + ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]] + ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]] + ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]] + ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]] + ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]] + ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]] + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]] + ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]] + ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]] + ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]] + ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]] + ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32) + ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]] + ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]] + ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]] + ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]] + ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32) + ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]] + ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]] + ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32) + ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]] + ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]] + ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UDIV %0, %1 @@ -968,36 +2291,480 @@ ; GFX6-LABEL: name: test_udiv_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32) + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_urem_s32 @@ -291,18 +291,441 @@ ; GFX6-LABEL: name: test_urem_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]] - ; GFX6: $vgpr0_vgpr1 = COPY [[UREM]](s64) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]] - ; GFX8: $vgpr0_vgpr1 = COPY [[UREM]](s64) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[UREM]](s64) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UREM %0, %1 @@ -320,28 +743,868 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]] - ; GFX6: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) + ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]] + ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]] + ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]] + ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]] + ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]] + ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]] + ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]] + ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]] + ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]] + ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]] + ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]] + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]] + ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]] + ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]] + ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]] + ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]] + ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]] + ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) + ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) + ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) + ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]] - ; GFX8: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) + ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]] + ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]] + ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]] + ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]] + ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]] + ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]] + ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]] + ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]] + ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]] + ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]] + ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]] + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]] + ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]] + ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]] + ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]] + ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]] + ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]] + ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) + ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) + ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) + ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]] - ; GFX9: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]] + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) + ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]] + ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]] + ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]] + ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]] + ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]] + ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]] + ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]] + ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]] + ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]] + ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]] + ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]] + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]] + ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]] + ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]] + ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]] + ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]] + ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]] + ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) + ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) + ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) + ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UREM %0, %1 @@ -950,36 +2213,459 @@ ; GFX6-LABEL: name: test_urem_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) - ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]] + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -0,0 +1,3456 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i64 @v_udiv_i64(i64 %num, i64 %den) { +; CHECK-LABEL: v_udiv_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execnz BB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 +; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CHECK-NEXT: v_trunc_f32_e32 v5, v5 +; CHECK-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 +; CHECK-NEXT: v_mul_hi_u32 v12, v4, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v4, v8 +; CHECK-NEXT: v_mul_lo_u32 v13, v5, v8 +; CHECK-NEXT: v_mul_hi_u32 v14, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v8 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v5, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_hi_u32 v12, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 +; CHECK-NEXT: v_mul_hi_u32 v13, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v4 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_sub_i32_e64 v7, s[4:5], v7, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v9, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v13, v6, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: BB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: v_mov_b32_e32 v0, v4 +; CHECK-NEXT: v_mov_b32_e32 v1, v5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = udiv i64 %num, %den + ret i64 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { +; CHECK-LABEL: s_udiv_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_mov_b32 s5, -1 +; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3] +; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0 +; CHECK-NEXT: s_cbranch_vccnz BB1_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v4 +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: s_branch BB1_3 +; CHECK-NEXT: BB1_2: +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: s_sub_u32 s6, 0, s2 +; CHECK-NEXT: s_cselect_b32 s4, 1, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: s_and_b32 s4, s4, 1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 +; CHECK-NEXT: s_subb_u32 s7, 0, s3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v2 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v0, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 +; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[0:1], s1, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; CHECK-NEXT: v_subrev_i32_e64 v5, s[0:1], s2, v5 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v6, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[0:1] +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: BB1_3: +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %result = udiv i64 %num, %den + %cast = bitcast i64 %result to <2 x i32> + %elt.0 = extractelement <2 x i32> %cast, i32 0 + %elt.1 = extractelement <2 x i32> %cast, i32 1 + %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) + %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) + %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 + %cast.back = bitcast <2 x i32> %ins.1 to i64 + ret i64 %cast.back +} + +define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_udiv_v2i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i64: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v8, v0 +; CGP-NEXT: v_mov_b32_e32 v9, v1 +; CGP-NEXT: v_or_b32_e32 v1, v9, v5 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB2_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v1, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v0 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 +; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v10, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; CGP-NEXT: v_mul_lo_u32 v11, v0, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v13, v10 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v13, v10 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v11, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v14, v8, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_lo_u32 v11, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v13, v4, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v10 +; CGP-NEXT: v_mul_lo_u32 v10, v4, v1 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v11 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v9, v10, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v9, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v11, s[4:5], v11, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v5 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[6:7] +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v13, v5 +; CGP-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc +; CGP-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v9, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v5 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v9, v15, v16, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CGP-NEXT: BB2_2: ; %Flow1 +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB2_4: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_or_b32_e32 v5, v3, v7 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB2_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v13, v8, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 +; CGP-NEXT: v_mul_lo_u32 v10, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v9, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v8, v8, v11 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; CGP-NEXT: v_mul_lo_u32 v9, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v8 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v11, v8 +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v11, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v5 +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v4 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v11, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v9, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v7 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v6 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v7 +; CGP-NEXT: v_cndmask_b32_e32 v8, v15, v8, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v7, v13, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; CGP-NEXT: BB2_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v2, v5, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v2, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB2_8: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = udiv <2 x i64> %num, %den + ret <2 x i64> %result +} + +define i64 @v_udiv_i64_pow2k_denom(i64 %num) { +; CHECK-LABEL: v_udiv_i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s8, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xfffff000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 +; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s8, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s8, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s8, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 +; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v8 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_subrev_i32_e64 v0, s[4:5], s8, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = udiv i64 %num, 4096 + ret i64 %result +} + +define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) { +; GISEL-LABEL: v_udiv_v2i64_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s12, 0x1000 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 +; GISEL-NEXT: s_sub_u32 s8, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, v4 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; GISEL-NEXT: s_sub_u32 s10, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s6, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v16, s10, v10 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, s12, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, s12, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s12, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v8, s12, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, s12, v7 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v18 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v19, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v2, s[4:5], s12, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v11, s[8:9], v1, v12, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v12 +; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v10 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; GISEL-NEXT: v_subrev_i32_e64 v0, s[10:11], s12, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, v8, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[10:11] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v18, v13, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v19, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i64_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s12, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, s12 +; CGP-NEXT: s_mov_b32 s8, 0xfffff000 +; CGP-NEXT: v_mov_b32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v13, s12, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, s12, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s12, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v8, s12, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v17, s12, v7 +; CGP-NEXT: v_add_i32_e32 v18, vcc, 1, v5 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v18 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v19, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v2, s[4:5], s12, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; CGP-NEXT: v_subb_u32_e64 v11, s[8:9], v1, v12, s[6:7] +; CGP-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v12 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], 0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v10 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; CGP-NEXT: v_subrev_i32_e64 v0, s[10:11], s12, v0 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, v12, v8, s[8:9] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[10:11] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, v18, v13, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v19, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = udiv <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_udiv_i64_oddk_denom(i64 %num) { +; CHECK-LABEL: v_udiv_i64_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 +; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s8, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s8, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s8, v3 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 +; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v8 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_subrev_i32_e64 v0, s[4:5], s8, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = udiv i64 %num, 1235195 + ret i64 %result +} + +define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) { +; GISEL-LABEL: v_udiv_v2i64_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s12, 0x12d8fb +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 +; GISEL-NEXT: s_sub_u32 s8, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, v4 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; GISEL-NEXT: s_sub_u32 s10, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s6, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v16, s10, v10 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v13, s12, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, s12, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s12, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v8, s12, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, s12, v7 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v18 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v19, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v2, s[4:5], s12, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v11, s[8:9], v1, v12, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v12 +; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v10 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; GISEL-NEXT: v_subrev_i32_e64 v0, s[10:11], s12, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, v8, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[10:11] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v18, v13, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v19, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i64_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s12, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, s12 +; CGP-NEXT: s_mov_b32 s8, 0xffed2705 +; CGP-NEXT: v_mov_b32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v13, s12, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, s12, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s12, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v8, s12, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v17, s12, v7 +; CGP-NEXT: v_add_i32_e32 v18, vcc, 1, v5 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v18 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v19, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v2, s[4:5], s12, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; CGP-NEXT: v_subb_u32_e64 v11, s[8:9], v1, v12, s[6:7] +; CGP-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v12 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], 0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v10 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; CGP-NEXT: v_subrev_i32_e64 v0, s[10:11], s12, v0 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, v12, v8, s[8:9] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[10:11] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, v18, v13, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v19, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = udiv <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { +; CHECK-LABEL: v_udiv_i64_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 +; CHECK-NEXT: v_or_b32_e32 v7, v1, v5 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execnz BB7_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v11, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v3, v9 +; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v13, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v14, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v8 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v3, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_hi_u32 v12, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 +; CHECK-NEXT: v_mul_hi_u32 v13, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_mul_lo_u32 v6, v4, v3 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10 +; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v6, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_sub_i32_e64 v7, s[4:5], v7, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v13, v6, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v5, v11, v12, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: BB7_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB7_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: BB7_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: v_mov_b32_e32 v0, v2 +; CHECK-NEXT: v_mov_b32_e32 v1, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i64 4096, %y + %r = udiv i64 %x, %shl.y + ret i64 %r +} + +define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { +; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 +; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v5, v0 +; CGP-NEXT: v_mov_b32_e32 v7, v1 +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 +; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 +; CGP-NEXT: v_or_b32_e32 v1, v7, v11 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB8_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10 +; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v1, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v6, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v0 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 +; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v6, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v14 +; CGP-NEXT: v_mul_lo_u32 v6, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v7, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v7, v1 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v7, v1 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v13, v10, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v4, v10, v1 +; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v5, v6 +; CGP-NEXT: v_subb_u32_e64 v13, s[4:5], v7, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v6, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v11 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[6:7] +; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v11, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v13, v11 +; CGP-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc +; CGP-NEXT: v_subbrev_u32_e64 v4, vcc, 0, v4, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v11 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v11 +; CGP-NEXT: v_cndmask_b32_e32 v4, v13, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v6, v15, v16, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: BB8_2: ; %Flow1 +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB8_4: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_or_b32_e32 v5, v3, v9 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[8:9], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB8_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8 +; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v6, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 +; CGP-NEXT: v_mul_lo_u32 v10, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v6, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 +; CGP-NEXT: v_mul_lo_u32 v7, v4, v6 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v6 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v11, v6 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v14 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v6, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v7, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v7, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v11, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_mul_lo_u32 v6, v8, v5 +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v4 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e64 v11, s[4:5], v3, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v7, s[4:5], v7, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v8 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e32 v6, v15, v6, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e32 v3, v11, v7, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v7, v13, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; CGP-NEXT: BB8_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[8:9] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v2, v5, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB8_8: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i64> , %y + %r = udiv <2 x i64> %x, %shl.y + ret <2 x i64> %r +} + +define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) { +; GISEL-LABEL: v_udiv_i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v2, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1 +; CGP-NEXT: v_rcp_f32_e32 v2, v1 +; CGP-NEXT: v_mul_f32_e32 v2, v0, v2 +; CGP-NEXT: v_trunc_f32_e32 v2, v2 +; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i64 %num, 16777215 + %den.mask = and i64 %den, 16777215 + %result = udiv i64 %num.mask, %den.mask + ret i64 %result +} + +define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_udiv_v2i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s6, 0xffffff +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0 +; GISEL-NEXT: v_and_b32_e32 v3, s6, v4 +; GISEL-NEXT: v_and_b32_e32 v4, s6, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 +; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v1 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v8 +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v1 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v19, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 +; GISEL-NEXT: v_mul_hi_u32 v18, v5, v17 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v18, v1, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v1, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19 +; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 +; GISEL-NEXT: v_and_b32_e32 v2, s6, v2 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v8, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v1 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v14 +; GISEL-NEXT: v_mul_lo_u32 v18, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v12 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v16 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v16, v13 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v15, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v5, v9 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v10 +; GISEL-NEXT: v_mul_lo_u32 v7, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v16, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v16, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v13, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v12, v10 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v17, v19 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v14, v13 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v12 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v16, v12 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v11, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v10, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, 0, v8 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v1 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v13, v3, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v4, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v5 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v18 +; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v19, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v12, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 +; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[10:11], v2, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[12:13], v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[12:13] +; GISEL-NEXT: v_sub_i32_e64 v0, s[12:13], v0, v3 +; GISEL-NEXT: v_sub_i32_e64 v4, s[14:15], 0, v12 +; GISEL-NEXT: v_cmp_le_u32_e64 s[14:15], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[14:15] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[14:15], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v4, vcc, 0, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[14:15] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v8, vcc, 0, v8, s[12:13] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v4, vcc, 0, v4, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v11, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v9, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v18, v13, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v16, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v9, v19, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v5, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s6, 0xffffff +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: v_and_b32_e32 v0, s6, v0 +; CGP-NEXT: v_and_b32_e32 v2, s6, v2 +; CGP-NEXT: v_and_b32_e32 v3, s6, v4 +; CGP-NEXT: v_and_b32_e32 v4, s6, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v4 +; CGP-NEXT: v_rcp_f32_e32 v5, v3 +; CGP-NEXT: v_rcp_f32_e32 v6, v4 +; CGP-NEXT: v_mul_f32_e32 v5, v0, v5 +; CGP-NEXT: v_mul_f32_e32 v6, v2, v6 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mad_f32 v0, -v5, v3, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mad_f32 v2, -v6, v4, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v4 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_and_b32_e32 v0, s6, v0 +; CGP-NEXT: v_and_b32_e32 v2, s6, v2 +; CGP-NEXT: v_mov_b32_e32 v3, v1 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i64> %num, + %den.mask = and <2 x i64> %den, + %result = udiv <2 x i64> %num.mask, %den.mask + ret <2 x i64> %result +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -0,0 +1,3427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i64 @v_urem_i64(i64 %num, i64 %den) { +; CHECK-LABEL: v_urem_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execnz BB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 +; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CHECK-NEXT: v_trunc_f32_e32 v5, v5 +; CHECK-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 +; CHECK-NEXT: v_mul_hi_u32 v12, v4, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v4, v8 +; CHECK-NEXT: v_mul_lo_u32 v13, v5, v8 +; CHECK-NEXT: v_mul_hi_u32 v14, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v5, v8 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v5, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_hi_u32 v12, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 +; CHECK-NEXT: v_mul_hi_u32 v13, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_mul_lo_u32 v5, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v2 +; CHECK-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, v7, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v1, vcc +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: BB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v0, v4 +; CHECK-NEXT: v_mov_b32_e32 v1, v5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem i64 %num, %den + ret i64 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { +; CHECK-LABEL: s_urem_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_mov_b32 s5, -1 +; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3] +; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0 +; CHECK-NEXT: s_cbranch_vccnz BB1_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], s2, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s0, v0 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CHECK-NEXT: s_branch BB1_3 +; CHECK-NEXT: BB1_2: +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: s_sub_u32 s6, 0, s2 +; CHECK-NEXT: s_cselect_b32 s4, 1, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: s_and_b32 s4, s4, 1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 +; CHECK-NEXT: s_subb_u32 s7, 0, s3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 +; CHECK-NEXT: v_trunc_f32_e32 v2, v2 +; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v0, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 +; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 +; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc +; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], s1, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] +; CHECK-NEXT: v_subrev_i32_e64 v5, s[0:1], s2, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s2, v5 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v4, vcc +; CHECK-NEXT: v_subbrev_u32_e64 v0, vcc, 0, v0, s[0:1] +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: BB1_3: +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %result = urem i64 %num, %den + %cast = bitcast i64 %result to <2 x i32> + %elt.0 = extractelement <2 x i32> %cast, i32 0 + %elt.1 = extractelement <2 x i32> %cast, i32 1 + %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) + %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) + %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 + %cast.back = bitcast <2 x i32> %ins.1 to i64 + ret i64 %cast.back +} + +define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_urem_v2i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v8, v0 +; CGP-NEXT: v_mov_b32_e32 v9, v1 +; CGP-NEXT: v_or_b32_e32 v1, v9, v5 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB2_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v1, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v0 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 +; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 +; CGP-NEXT: v_mul_lo_u32 v12, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v10, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; CGP-NEXT: v_mul_lo_u32 v11, v0, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v13, v10 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v13, v10 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v9, v0 +; CGP-NEXT: v_mul_hi_u32 v11, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v14, v8, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_lo_u32 v11, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v10 +; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v11 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v9, v0, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 +; CGP-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v1, v4 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v0, v5, vcc +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v14, vcc, v11, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v14, vcc +; CGP-NEXT: v_cndmask_b32_e32 v11, v0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: BB2_2: ; %Flow1 +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB2_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB2_4: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_or_b32_e32 v5, v3, v7 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB2_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v8, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v13, v8, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 +; CGP-NEXT: v_mul_lo_u32 v10, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v9, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v8, v8, v11 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; CGP-NEXT: v_mul_lo_u32 v9, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v8 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v11, v8 +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v6, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v5, v6 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v7, vcc +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v6 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v9, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e32 v7, v13, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v3, vcc +; CGP-NEXT: BB2_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB2_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB2_8: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i64> %num, %den + ret <2 x i64> %result +} + +define i64 @v_urem_i64_pow2k_denom(i64 %num) { +; CHECK-LABEL: v_urem_i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s8, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xfffff000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 +; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s8, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s8, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v3, s8, v3 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s8, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem i64 %num, 4096 + ret i64 %result +} + +define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { +; GISEL-LABEL: v_urem_v2i64_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s12, 0x1000 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 +; GISEL-NEXT: s_sub_u32 s8, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, v4 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; GISEL-NEXT: s_sub_u32 s10, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s6, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v16, s10, v10 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, s12, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, s12, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, s12, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v6, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, s12, v7 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v7, s[4:5], s12, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v8, s[8:9], v1, v5, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v5 +; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[8:9] +; GISEL-NEXT: v_subrev_i32_e64 v9, s[8:9], s12, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v12, vcc, s12, v7 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; GISEL-NEXT: v_subrev_i32_e64 v15, s[6:7], s12, v9 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s12, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, s12 +; CGP-NEXT: s_mov_b32 s8, 0xfffff000 +; CGP-NEXT: v_mov_b32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s12, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, s12, v5 +; CGP-NEXT: v_mul_lo_u32 v13, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, s12, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v6, s12, v6 +; CGP-NEXT: v_mul_lo_u32 v7, s12, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s12, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; CGP-NEXT: v_subb_u32_e64 v8, s[8:9], v1, v5, s[6:7] +; CGP-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v5 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[8:9] +; CGP-NEXT: v_subrev_i32_e64 v9, s[8:9], s12, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s12, v7 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[10:11] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v9 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_subrev_i32_e64 v15, s[6:7], s12, v9 +; CGP-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[10:11] +; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[8:9] +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v11, v16, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_urem_i64_oddk_denom(i64 %num) { +; CHECK-LABEL: v_urem_i64_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 +; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s8, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s8, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_mul_lo_u32 v3, s8, v3 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s8, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[6:7] +; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem i64 %num, 1235195 + ret i64 %result +} + +define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { +; GISEL-LABEL: v_urem_v2i64_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s12, 0x12d8fb +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12 +; GISEL-NEXT: s_sub_u32 s8, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, v4 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s9, 0, 0 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; GISEL-NEXT: s_sub_u32 s10, 0, s12 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s6, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, s10, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 +; GISEL-NEXT: v_mul_hi_u32 v12, s10, v4 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 +; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 +; GISEL-NEXT: v_mul_lo_u32 v16, s10, v10 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, s12, v4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, s12, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, s12, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v6, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, s12, v7 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v7, s[4:5], s12, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v8, s[8:9], v1, v5, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v5 +; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[8:9] +; GISEL-NEXT: v_subrev_i32_e64 v9, s[8:9], s12, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v12, vcc, s12, v7 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; GISEL-NEXT: v_subrev_i32_e64 v15, s[6:7], s12, v9 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s12, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, s12 +; CGP-NEXT: s_mov_b32 s8, 0xffed2705 +; CGP-NEXT: v_mov_b32_e32 v6, v5 +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 +; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 +; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc +; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s12, v4 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, s12, v5 +; CGP-NEXT: v_mul_lo_u32 v13, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, s12, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v6, s12, v6 +; CGP-NEXT: v_mul_lo_u32 v7, s12, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s12, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v0, v11 +; CGP-NEXT: v_subb_u32_e64 v8, s[8:9], v1, v5, s[6:7] +; CGP-NEXT: v_sub_i32_e64 v1, s[8:9], v1, v5 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], s12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[8:9] +; CGP-NEXT: v_subrev_i32_e64 v9, s[8:9], s12, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[10:11] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s12, v7 +; CGP-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[10:11] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v9 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_subrev_i32_e64 v15, s[6:7], s12, v9 +; CGP-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[10:11] +; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[8:9] +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v11, v16, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v8, v1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { +; CHECK-LABEL: v_urem_i64_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 +; CHECK-NEXT: v_or_b32_e32 v7, v1, v5 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execnz BB7_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; CHECK-NEXT: v_trunc_f32_e32 v3, v3 +; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v11, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v3, v9 +; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v2, v8 +; CHECK-NEXT: v_mul_lo_u32 v13, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v14, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v8 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v3, v8, vcc +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v2 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_hi_u32 v12, v2, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 +; CHECK-NEXT: v_mul_hi_u32 v13, v2, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_mul_lo_u32 v3, v4, v3 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v7 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v3, v4 +; CHECK-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v5, vcc +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, v7, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v11, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v7, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc +; CHECK-NEXT: BB7_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB7_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: BB7_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v0, v2 +; CHECK-NEXT: v_mov_b32_e32 v1, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i64 4096, %y + %r = urem i64 %x, %shl.y + ret i64 %r +} + +define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { +; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 +; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GISEL-NEXT: v_trunc_f32_e32 v5, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc +; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v5, v0 +; CGP-NEXT: v_mov_b32_e32 v7, v1 +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 +; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 +; CGP-NEXT: v_or_b32_e32 v1, v7, v11 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB8_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10 +; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc +; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; CGP-NEXT: v_trunc_f32_e32 v1, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v14, v6, v0 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v0 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 +; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 +; CGP-NEXT: v_mul_lo_u32 v12, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v6, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v14 +; CGP-NEXT: v_mul_lo_u32 v6, v0, v4 +; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v4, v7, v0 +; CGP-NEXT: v_mul_hi_u32 v6, v5, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v7, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v7, v1 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v7, v1 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v6, v10, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v1, v10, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v6 +; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v0, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v7, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v11 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v11, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v11 +; CGP-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v10 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v0, v11, vcc +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v14, vcc, v7, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v11, v15, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v14, vcc +; CGP-NEXT: v_cndmask_b32_e32 v11, v0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc +; CGP-NEXT: BB8_2: ; %Flow1 +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB8_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB8_4: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_or_b32_e32 v5, v3, v9 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execnz BB8_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8 +; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v6, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 +; CGP-NEXT: v_mul_lo_u32 v10, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v6, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 +; CGP-NEXT: v_mul_lo_u32 v7, v4, v6 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v6 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v11, v6 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v14 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_mul_lo_u32 v6, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v7, v2, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CGP-NEXT: v_mul_lo_u32 v7, v8, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CGP-NEXT: v_mul_lo_u32 v5, v8, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v8 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v8 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v8 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v7, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e32 v9, v13, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v3, vcc +; CGP-NEXT: BB8_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB8_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB8_8: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i64> , %y + %r = urem <2 x i64> %x, %shl.y + ret <2 x i64> %r +} + +define i64 @v_urem_i64_24bit(i64 %num, i64 %den) { +; GISEL-LABEL: v_urem_i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 +; CGP-NEXT: v_rcp_f32_e32 v4, v3 +; CGP-NEXT: v_mul_f32_e32 v4, v2, v4 +; CGP-NEXT: v_trunc_f32_e32 v4, v4 +; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i64 %num, 16777215 + %den.mask = and i64 %den, 16777215 + %result = urem i64 %num.mask, %den.mask + ret i64 %result +} + +define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_urem_v2i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s6, 0xffffff +; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0 +; GISEL-NEXT: v_and_b32_e32 v3, s6, v4 +; GISEL-NEXT: v_and_b32_e32 v4, s6, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 +; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v1 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v8 +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v1 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v19, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 +; GISEL-NEXT: v_mul_hi_u32 v18, v5, v17 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 +; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v18, v1, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v1, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v1, v12 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 +; GISEL-NEXT: v_mul_hi_u32 v19, v5, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19 +; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 +; GISEL-NEXT: v_and_b32_e32 v2, s6, v2 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v8, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v1 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v1 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v1 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 +; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v14 +; GISEL-NEXT: v_mul_lo_u32 v18, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v19, v1, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v12 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v16 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, v16, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v10, v5, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v16, v13 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 +; GISEL-NEXT: v_mul_lo_u32 v15, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v5, v9 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v10 +; GISEL-NEXT: v_mul_lo_u32 v7, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v16, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v16, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v13, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v18, v15 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v12, v10 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v17, v19 +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v14, v13 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v12 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v16, v12 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v11, v10, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_mul_lo_u32 v7, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v10, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, 0, v8 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v1 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v1 +; GISEL-NEXT: v_mul_hi_u32 v1, v3, v1 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v4, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_lo_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v1, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], 0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v9, s[8:9], 0, v5, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v5, s[8:9], 0, v5 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[8:9] +; GISEL-NEXT: v_sub_i32_e64 v11, s[8:9], v2, v4 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 +; GISEL-NEXT: v_cmp_le_u32_e64 s[10:11], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v5, s[6:7], 0, v5, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v11, v4 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v12, v7, s[10:11] +; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v14, v10, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e64 v5, s[4:5], 0, v5, s[8:9] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v5, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v13, v16, v15, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v17, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s6, 0xffffff +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: v_and_b32_e32 v0, s6, v0 +; CGP-NEXT: v_and_b32_e32 v2, s6, v2 +; CGP-NEXT: v_and_b32_e32 v3, s6, v4 +; CGP-NEXT: v_and_b32_e32 v4, s6, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v8, v4 +; CGP-NEXT: v_rcp_f32_e32 v9, v6 +; CGP-NEXT: v_rcp_f32_e32 v10, v8 +; CGP-NEXT: v_mul_f32_e32 v9, v5, v9 +; CGP-NEXT: v_mul_f32_e32 v10, v7, v10 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_trunc_f32_e32 v10, v10 +; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] +; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v7|, v8 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_and_b32_e32 v0, s6, v0 +; CGP-NEXT: v_and_b32_e32 v2, s6, v2 +; CGP-NEXT: v_mov_b32_e32 v3, v1 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i64> %num, + %den.mask = and <2 x i64> %den, + %result = urem <2 x i64> %num.mask, %den.mask + ret <2 x i64> %result +}